[lld] [lld] Add support for EC code map. (PR #69101)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 15 03:03:19 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: Jacek Caban (cjacek)
<details>
<summary>Changes</summary>
This is a refreshed version of https://reviews.llvm.org/D157148 and depends on #<!-- -->69100. More info about my findings is here: https://wiki.winehq.org/ARM64ECToolchain#Code_layout
cc @<!-- -->bylaws
---
Patch is 27.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69101.diff
9 Files Affected:
- (modified) lld/COFF/Chunks.cpp (+16-2)
- (modified) lld/COFF/Chunks.h (+55-2)
- (modified) lld/COFF/DLL.cpp (+8)
- (modified) lld/COFF/Driver.cpp (+5)
- (modified) lld/COFF/Writer.cpp (+97-5)
- (modified) lld/COFF/Writer.h (+6)
- (added) lld/test/COFF/Inputs/loadconfig-arm64ec.s (+97)
- (added) lld/test/COFF/arm64ec-codemap.test (+159)
- (added) lld/test/COFF/arm64ec-reloc.test (+37)
``````````diff
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index e17b64df869fe88..39f4575031be549 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -437,7 +437,7 @@ void SectionChunk::applyRelocation(uint8_t *off,
// Compute the RVA of the relocation for relative relocations.
uint64_t p = rva + rel.VirtualAddress;
uint64_t imageBase = file->ctx.config.imageBase;
- switch (file->ctx.config.machine) {
+ switch (getMachine()) {
case AMD64:
applyRelX64(off, rel.Type, os, s, p, imageBase);
break;
@@ -551,7 +551,7 @@ static uint8_t getBaserelType(const coff_relocation &rel,
// Only called when base relocation is enabled.
void SectionChunk::getBaserels(std::vector<Baserel> *res) {
for (const coff_relocation &rel : getRelocs()) {
- uint8_t ty = getBaserelType(rel, file->ctx.config.machine);
+ uint8_t ty = getBaserelType(rel, getMachine());
if (ty == IMAGE_REL_BASED_ABSOLUTE)
continue;
Symbol *target = file->getSymbol(rel.SymbolTableIndex);
@@ -896,6 +896,20 @@ void RVAFlagTableChunk::writeTo(uint8_t *buf) const {
"RVA tables should be de-duplicated");
}
+size_t ECCodeMapChunk::getSize() const {
+ return map.size() * sizeof(chpe_range_entry);
+}
+
+void ECCodeMapChunk::writeTo(uint8_t *buf) const {
+ auto table = reinterpret_cast<chpe_range_entry *>(buf);
+ for (uint32_t i = 0; i < map.size(); i++) {
+ const ECCodeMapEntry &entry = map[i];
+ uint32_t start = entry.first->getRVA();
+ table[i].StartOffset = start | entry.type;
+ table[i].Length = entry.last->getRVA() + entry.last->getSize() - start;
+ }
+}
+
// MinGW specific, for the "automatic import of variables from DLLs" feature.
size_t PseudoRelocTableChunk::getSize() const {
if (relocs.empty())
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 3d605e6ab10c8c8..4db9505611e459f 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -24,10 +24,11 @@
namespace lld::coff {
using llvm::COFF::ImportDirectoryTableEntry;
-using llvm::object::COFFSymbolRef;
-using llvm::object::SectionRef;
+using llvm::object::chpe_range_type;
using llvm::object::coff_relocation;
using llvm::object::coff_section;
+using llvm::object::COFFSymbolRef;
+using llvm::object::SectionRef;
class Baserel;
class Defined;
@@ -114,6 +115,9 @@ class Chunk {
// synthesized by the linker.
bool isHotPatchable() const;
+ MachineTypes getMachine() const;
+ chpe_range_type getArm64ECRangeType() const;
+
protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
@@ -164,6 +168,8 @@ class NonSectionChunk : public Chunk {
// Collect all locations that contain absolute addresses for base relocations.
virtual void getBaserels(std::vector<Baserel> *res) {}
+ virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; }
+
// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
@@ -219,6 +225,8 @@ class SectionChunk final : public Chunk {
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *buf) const;
+ MachineTypes getMachine() const { return file->getMachineType(); }
+
// Defend against unsorted relocations. This may be overly conservative.
void sortRelocations();
@@ -418,6 +426,24 @@ inline StringRef Chunk::getDebugName() const {
return static_cast<const NonSectionChunk *>(this)->getDebugName();
}
+inline MachineTypes Chunk::getMachine() const {
+ if (isa<SectionChunk>(this))
+ return static_cast<const SectionChunk *>(this)->getMachine();
+ else
+ return static_cast<const NonSectionChunk *>(this)->getMachine();
+}
+
+inline chpe_range_type Chunk::getArm64ECRangeType() const {
+ switch (getMachine()) {
+ case AMD64:
+ return chpe_range_type::Amd64;
+ case ARM64EC:
+ return chpe_range_type::Arm64EC;
+ default:
+ return chpe_range_type::Arm64;
+ }
+}
+
// This class is used to implement an lld-specific feature (not implemented in
// MSVC) that minimizes the output size by finding string literals sharing tail
// parts and merging them.
@@ -504,6 +530,7 @@ class ImportThunkChunkX64 : public ImportThunkChunk {
explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s);
size_t getSize() const override { return sizeof(importThunkX86); }
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return AMD64; }
};
class ImportThunkChunkX86 : public ImportThunkChunk {
@@ -513,6 +540,7 @@ class ImportThunkChunkX86 : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkX86); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return I386; }
};
class ImportThunkChunkARM : public ImportThunkChunk {
@@ -524,6 +552,7 @@ class ImportThunkChunkARM : public ImportThunkChunk {
size_t getSize() const override { return sizeof(importThunkARM); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARMNT; }
};
class ImportThunkChunkARM64 : public ImportThunkChunk {
@@ -534,6 +563,7 @@ class ImportThunkChunkARM64 : public ImportThunkChunk {
}
size_t getSize() const override { return sizeof(importThunkARM64); }
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARM64; }
};
class RangeExtensionThunkARM : public NonSectionChunk {
@@ -544,6 +574,7 @@ class RangeExtensionThunkARM : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARMNT; }
Defined *target;
@@ -559,6 +590,7 @@ class RangeExtensionThunkARM64 : public NonSectionChunk {
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
+ MachineTypes getMachine() const override { return ARM64; }
Defined *target;
@@ -663,6 +695,27 @@ class EmptyChunk : public NonSectionChunk {
void writeTo(uint8_t *buf) const override {}
};
+class ECCodeMapEntry {
+public:
+ ECCodeMapEntry(Chunk *first, Chunk *last, chpe_range_type type)
+ : first(first), last(last), type(type) {}
+ Chunk *first;
+ Chunk *last;
+ chpe_range_type type;
+};
+
+// This is a chunk containing CHPE code map on EC targets. It's a table
+// of address ranges and their types.
+class ECCodeMapChunk : public NonSectionChunk {
+public:
+ ECCodeMapChunk(std::vector<ECCodeMapEntry> &map) : map(map) {}
+ size_t getSize() const override;
+ void writeTo(uint8_t *buf) const override;
+
+private:
+ std::vector<ECCodeMapEntry> ↦
+};
+
// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 5977970104672c8..0b337a209c377db 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -318,6 +318,7 @@ class ThunkChunkX64 : public NonSectionChunk {
ThunkChunkX64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}
size_t getSize() const override { return sizeof(thunkX64); }
+ MachineTypes getMachine() const override { return AMD64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX64, sizeof(thunkX64));
@@ -334,6 +335,7 @@ class TailMergeChunkX64 : public NonSectionChunk {
TailMergeChunkX64(Chunk *d, Defined *h) : desc(d), helper(h) {}
size_t getSize() const override { return sizeof(tailMergeX64); }
+ MachineTypes getMachine() const override { return AMD64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
@@ -386,6 +388,7 @@ class ThunkChunkX86 : public NonSectionChunk {
: imp(i), tailMerge(tm), ctx(ctx) {}
size_t getSize() const override { return sizeof(thunkX86); }
+ MachineTypes getMachine() const override { return I386; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkX86, sizeof(thunkX86));
@@ -410,6 +413,7 @@ class TailMergeChunkX86 : public NonSectionChunk {
: desc(d), helper(h), ctx(ctx) {}
size_t getSize() const override { return sizeof(tailMergeX86); }
+ MachineTypes getMachine() const override { return I386; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX86, sizeof(tailMergeX86));
@@ -436,6 +440,7 @@ class ThunkChunkARM : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(thunkARM); }
+ MachineTypes getMachine() const override { return ARMNT; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM, sizeof(thunkARM));
@@ -462,6 +467,7 @@ class TailMergeChunkARM : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(tailMergeARM); }
+ MachineTypes getMachine() const override { return ARMNT; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM, sizeof(tailMergeARM));
@@ -487,6 +493,7 @@ class ThunkChunkARM64 : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(thunkARM64); }
+ MachineTypes getMachine() const override { return ARM64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, thunkARM64, sizeof(thunkARM64));
@@ -506,6 +513,7 @@ class TailMergeChunkARM64 : public NonSectionChunk {
}
size_t getSize() const override { return sizeof(tailMergeARM64); }
+ MachineTypes getMachine() const override { return ARM64; }
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 0fbfefdf43cf1ad..912ef92b3e8559c 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2340,6 +2340,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);
+ if (isArm64EC(config->machine)) {
+ ctx.symtab.addAbsolute("__hybrid_code_map", 0);
+ ctx.symtab.addAbsolute("__hybrid_code_map_count", 0);
+ }
+
if (config->pseudoRelocs) {
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 4f6c2a57f533505..a3f43c14f3ab583 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -212,6 +212,7 @@ class Writer {
void locateImportTables();
void createExportTable();
void mergeSections();
+ void sortECChunks();
void removeUnusedSections();
void assignAddresses();
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
@@ -220,6 +221,7 @@ class Writer {
uint16_t type, int margin);
bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks);
+ void createECCodeMap();
void finalizeAddresses();
void removeEmptySections();
void assignOutputSectionIndices();
@@ -228,6 +230,7 @@ class Writer {
template <typename PEHeaderTy> void writeHeader();
void createSEHTable();
void createRuntimePseudoRelocs();
+ void createECChunks();
void insertCtorDtorSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
@@ -270,6 +273,7 @@ class Writer {
std::map<PartialSectionKey, PartialSection *> partialSections;
std::vector<char> strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
+ std::vector<ECCodeMapEntry> codeMap;
IdataContents idata;
Chunk *importTableStart = nullptr;
uint64_t importTableSize = 0;
@@ -527,6 +531,55 @@ bool Writer::createThunks(OutputSection *os, int margin) {
return addressesChanged;
}
+// Create a code map for CHPE metadata.
+void Writer::createECCodeMap() {
+ if (!isArm64EC(ctx.config.machine))
+ return;
+
+ // Clear the map in case we were're recomputing the map after adding
+ // a range extension thunk.
+ codeMap.clear();
+
+ Chunk *first = nullptr, *last;
+ chpe_range_type lastType;
+
+ auto closeRange = [&]() {
+ if (first) {
+ codeMap.push_back({first, last, lastType});
+ first = nullptr;
+ }
+ };
+
+ for (OutputSection *sec : ctx.outputSections) {
+ if (!sec->isCodeSection()) {
+ closeRange();
+ continue;
+ }
+
+ for (Chunk *c : sec->chunks) {
+ // Skip empty section chunks. MSVC does not seem to do that and
+ // generates empty code ranges in some cases.
+ if (isa<SectionChunk>(c) && !c->getSize())
+ continue;
+
+ chpe_range_type chunkType = c->getArm64ECRangeType();
+ if (!first) {
+ first = c;
+ } else if (lastType != chunkType) {
+ closeRange();
+ first = c;
+ }
+ last = c;
+ lastType = chunkType;
+ }
+ }
+
+ closeRange();
+
+ Symbol *tableCountSym = ctx.symtab.findUnderscore("__hybrid_code_map_count");
+ cast<DefinedAbsolute>(tableCountSym)->setVA(codeMap.size());
+}
+
// Verify that all relocations are in range, with no extra margin requirements.
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
for (Chunk *c : chunks) {
@@ -676,6 +729,7 @@ void Writer::run() {
createMiscChunks();
createExportTable();
mergeSections();
+ sortECChunks();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
@@ -1075,6 +1129,9 @@ void Writer::createMiscChunks() {
if (config->guardCF != GuardCFLevel::Off)
createGuardCFTables();
+ if (isArm64EC(config->machine))
+ createECChunks();
+
if (config->autoImport)
createRuntimePseudoRelocs();
@@ -1377,12 +1434,31 @@ void Writer::mergeSections() {
}
}
+// EC targets may have chunks of various architectures mixed together at this
+// point. Group code chunks of the same architecture together by sorting chunks
+// by their EC range type.
+void Writer::sortECChunks() {
+ if (!isArm64EC(ctx.config.machine))
+ return;
+
+ for (OutputSection *sec : ctx.outputSections) {
+ if (sec->isCodeSection())
+ llvm::stable_sort(sec->chunks, [=](const Chunk *a, const Chunk *b) {
+ return a->getArm64ECRangeType() < b->getArm64ECRangeType();
+ });
+ }
+}
+
// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
llvm::TimeTraceScope timeScope("Assign addresses");
Configuration *config = &ctx.config;
+ // We need to create EC code map so that ECCodeMapChunk knows its size.
+ // We do it here to make sure that we account for range extension chunks.
+ createECCodeMap();
+
sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
sizeof(data_directory) * numberOfDataDirectory +
sizeof(coff_section) * ctx.outputSections.size();
@@ -1403,13 +1479,20 @@ void Writer::assignAddresses() {
// If /FUNCTIONPADMIN is used, functions are padded in order to create a
// hotpatchable image.
- const bool isCodeSection =
- (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) &&
- (sec->header.Characteristics & IMAGE_SCN_MEM_READ) &&
- (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE);
- uint32_t padding = isCodeSection ? config->functionPadMin : 0;
+ uint32_t padding = sec->isCodeSection() ? config->functionPadMin : 0;
+ MachineTypes prevECMachine = IMAGE_FILE_MACHINE_UNKNOWN;
for (Chunk *c : sec->chunks) {
+ if (isArm64EC(ctx.config.machine) && sec->isCodeSection()) {
+ MachineTypes machine = c->getMachine();
+ if (machine != IMAGE_FILE_MACHINE_UNKNOWN) {
+ // We need additional alignment when crossing EC range baudaries.
+ if (prevECMachine != IMAGE_FILE_MACHINE_UNKNOWN &&
+ machine != prevECMachine)
+ virtualSize = alignTo(virtualSize, 4096);
+ prevECMachine = machine;
+ }
+ }
if (padding && c->isHotPatchable())
virtualSize += padding;
virtualSize = alignTo(virtualSize, c->getAlignment());
@@ -1913,6 +1996,15 @@ void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4));
}
+// Create CHPE metadata chunks.
+void Writer::createECChunks() {
+ auto codeMapChunk = make<ECCodeMapChunk>(codeMap);
+ rdataSec->addChunk(codeMapChunk);
+ Symbol *codeMapSym = ctx.symtab.findUnderscore("__hybrid_code_map");
+ replaceSymbol<DefinedSynthetic>(codeMapSym, codeMapSym->getName(),
+ codeMapChunk);
+}
+
// MinGW specific. Gather all relocations that are imported from a DLL even
// though the code didn't expect it to, produce the table that the runtime
// uses for fixing them up, and provide the synthetic symbols that the
diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h
index 4a74aa7ada59d73..9004bb310d07305 100644
--- a/lld/COFF/Writer.h
+++ b/lld/COFF/Writer.h
@@ -64,6 +64,12 @@ class OutputSection {
// Used only when the name is longer than 8 bytes.
void setStringTableOff(uint32_t v) { stringTableOff = v; }
+ bool isCodeSection() const {
+ return (header.Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) &&
+ (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) &&
+ (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE);
+ }
+
// N.B. The section index is one based.
uint32_t sectionIndex = 0;
diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
new file mode 100644
index 000000000000000..78ae594a21eff3f
--- /dev/null
+++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
@@ -0,0 +1,97 @@
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0x140
+ .fill 0x54, 1, 0
+ .xword __security_cookie
+ .fill 0x10, 1, 0
+ .xword __guard_check_icall_fptr
+ .xword __guard_dispatch_icall_fptr
+ .xword __guard_fids_table
+ .xword __guard_fids_count
+ .xword __guard_flags
+ .xword 0
+ .xword __guard_iat_table
+ .xword __guard_iat_count
+ .xword __guard_longjmp_table
+ .xword __guard_longjmp_count
+ .xword 0
+ .xword __chpe_metadata
+ .fill 0x78, 1, 0
+
+__guard_check_icall_fptr:
+ .xword 0
+__guard_dispatch_icall_fptr:
+ .xword 0
+__os_arm64x_dispatch_call_no_redirect:
+ .xword 0
+__os_arm64x_dispatch_ret:
+ .xword 0
+__os_arm64x_check_call:
+ .xword 0
+__os_arm64x_check_icall:
+ .xword 0
+__os_arm64x_get_x64_information:
+ .xword 0
+__os_arm64x_set_x64_information:
+ .xword 0
+__os_arm64x_check_icall_cfg:
+ .xword 0
+__os_arm64x_dispatch_fptr:
+ .xword 0
+__os_arm64x_helper0:
+ .xword 0
+__os_arm64x_helper1:
+ .xword 0
+__os_arm64x_helper2:
+ .xword 0
+__os_arm64x_helper3:
+ .xword 0
+__os_arm64x_helper4:
+ .xword 0
+__os_arm64x_helper5:
+ .xword 0
+__os_arm64x_helper6:
+ .xword 0
+__os_arm64x_helper7:
+ .xword 0
+__os_arm64x_helper8:
+ .xword 0
+
+ .data
+ .globl __chpe_metadata
+ .p2align 3, 0
+__chpe_metadata:
+ .word 1
+ .rva __hybrid_code_map
+ .word __hybrid_code_map_count
+ .word 0 // __x64_code_ranges_to_entry_points
+ .word 0 //__arm64x_redirection_metadata
+ .rva __os_arm64x_dispatch_call_no_redirect
+ .rva __os_arm64x_dispatch_ret
+ .rva __os_arm64x_check_call
+ .rva __os_arm64x_check_icall
+ .rva __os_arm64x_check_icall_cfg
+ .word 0 // __arm64x_native_entrypoint
+ .word 0 // __hybrid_auxiliary_iat
+ .word 0 // __x64_code_ranges_to_entry_points_count
+ .word 0 // __arm64x_redirection_metadata_count
+ .rva __os_arm64x_get_x64_information
+ .rva __os_arm64x_set_x64_information
+ .word 0 // __arm64x_extra_rfe_table
+ .word 0 // __arm64x_extra_rfe_table_size
+ .rva __os_arm64x_dispatch_fptr
+ .word 0 // __hybrid_auxiliary_iat_copy
+ .rv...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69101
More information about the llvm-commits
mailing list