[lld] [LLD][COFF] Add support for ARM64EC import call thunks with extended range (PR #109703)
Jacek Caban via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 11:31:09 PDT 2024
https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/109703
The MSVC linker generates range extensions for these thunks when needed. This commit inlines the range extension into the thunk, making it both slightly more optimal and easier to implement in LLD.
>From e6c4fc7c28e7762346c77b1c88335528638795d0 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Mon, 23 Sep 2024 20:14:37 +0200
Subject: [PATCH 1/2] [LLD][COFF][NFC] Use dyn_cast on section chunks
Instead of dyn_cast_or_null, chunk pointers are never null.
---
lld/COFF/Writer.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index c2765453aa964e..7cf723a8cf103f 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -472,7 +472,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
// Recheck Chunks.size() each iteration, since we can insert more
// elements into it.
for (size_t i = 0; i != os->chunks.size(); ++i) {
- SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
+ SectionChunk *sc = dyn_cast<SectionChunk>(os->chunks[i]);
if (!sc)
continue;
MachineTypes machine = sc->getMachine();
@@ -606,7 +606,7 @@ void Writer::createECCodeMap() {
// Verify that all relocations are in range, with no extra margin requirements.
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
for (Chunk *c : chunks) {
- SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
+ SectionChunk *sc = dyn_cast<SectionChunk>(c);
if (!sc)
continue;
MachineTypes machine = sc->getMachine();
@@ -872,8 +872,8 @@ bool Writer::fixGnuImportChunks() {
if (!pSec->chunks.empty())
hasIdata = true;
llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) {
- SectionChunk *sc1 = dyn_cast_or_null<SectionChunk>(s);
- SectionChunk *sc2 = dyn_cast_or_null<SectionChunk>(t);
+ SectionChunk *sc1 = dyn_cast<SectionChunk>(s);
+ SectionChunk *sc2 = dyn_cast<SectionChunk>(t);
if (!sc1 || !sc2) {
// if SC1, order them ascending. If SC2 or both null,
// S is not less than T.
>From 6d6098296d0e5df6c27a1096fddafd4e18cfa194 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Mon, 23 Sep 2024 20:18:34 +0200
Subject: [PATCH 2/2] [LLD][COFF] Add support for ARM64EC import call thunks
with extended range
The MSVC linker generates range extensions for these thunks when needed.
This commit inlines the range extension into the thunk, making it both
slightly more optimal and easier to implement in LLD.
---
lld/COFF/Chunks.cpp | 31 +++++++++++++++-
lld/COFF/Chunks.h | 12 ++++++-
lld/COFF/Writer.cpp | 13 +++++--
lld/test/COFF/arm64ec-import-range-ext.test | 39 +++++++++++++++++++++
4 files changed, 91 insertions(+), 4 deletions(-)
create mode 100644 lld/test/COFF/arm64ec-import-range-ext.test
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 6510c637ae8fe6..c6986681dffe77 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -1100,6 +1100,13 @@ void CHPERedirectionChunk::writeTo(uint8_t *buf) const {
ImportThunkChunkARM64EC::ImportThunkChunkARM64EC(ImportFile *file)
: ImportThunkChunk(file->ctx, file->impSym), file(file) {}
+size_t ImportThunkChunkARM64EC::getSize() const {
+ if (!extended)
+ return sizeof(importThunkARM64EC);
+ // The last instruction is replaced with an inline range extension thunk.
+ return sizeof(importThunkARM64EC) + sizeof(arm64Thunk) - sizeof(uint32_t);
+}
+
void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
memcpy(buf, importThunkARM64EC, sizeof(importThunkARM64EC));
applyArm64Addr(buf, file->impSym->getRVA(), rva, 12);
@@ -1116,7 +1123,29 @@ void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
applyArm64Imm(buf + 12, exitThunkRVA & 0xfff, 0);
Defined *helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
- applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16);
+ if (extended) {
+ // Replace last instruction with an inline range extension thunk.
+ memcpy(buf + 16, arm64Thunk, sizeof(arm64Thunk));
+ applyArm64Addr(buf + 16, helper->getRVA(), rva + 16, 12);
+ applyArm64Imm(buf + 20, helper->getRVA() & 0xfff, 0);
+ } else {
+ applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16);
+ }
+}
+
+bool ImportThunkChunkARM64EC::verifyRanges() {
+ if (extended)
+ return true;
+ auto helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
+ return isInt<28>(helper->getRVA() - rva - 16);
+}
+
+uint32_t ImportThunkChunkARM64EC::extendRanges() {
+ if (extended || verifyRanges())
+ return 0;
+ extended = true;
+ // The last instruction is replaced with an inline range extension thunk.
+ return sizeof(arm64Thunk) - sizeof(uint32_t);
}
} // namespace lld::coff
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 04a656ae0874ec..42284f485e5c07 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -185,6 +185,13 @@ class NonSectionChunk : public Chunk {
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
+ // Verify that chunk relocations are within their ranges.
+ virtual bool verifyRanges() { return true; };
+
+ // If needed, extend the chunk to ensure all relocations are within the
+ // allowed ranges. Return the additional space required for the extension.
+ virtual uint32_t extendRanges() { return 0; };
+
static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
protected:
@@ -620,12 +627,15 @@ class ImportThunkChunkARM64 : public ImportThunkChunk {
class ImportThunkChunkARM64EC : public ImportThunkChunk {
public:
explicit ImportThunkChunkARM64EC(ImportFile *file);
- size_t getSize() const override { return sizeof(importThunkARM64EC); };
+ size_t getSize() const override;
MachineTypes getMachine() const override { return ARM64EC; }
void writeTo(uint8_t *buf) const override;
+ bool verifyRanges() override;
+ uint32_t extendRanges() override;
Defined *exitThunk;
Defined *sym = nullptr;
+ bool extended = false;
private:
ImportFile *file;
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 7cf723a8cf103f..efab7d3e83709c 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -473,8 +473,14 @@ bool Writer::createThunks(OutputSection *os, int margin) {
// elements into it.
for (size_t i = 0; i != os->chunks.size(); ++i) {
SectionChunk *sc = dyn_cast<SectionChunk>(os->chunks[i]);
- if (!sc)
+ if (!sc) {
+ auto chunk = cast<NonSectionChunk>(os->chunks[i]);
+ if (uint32_t size = chunk->extendRanges()) {
+ thunksSize += size;
+ addressesChanged = true;
+ }
continue;
+ }
MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1;
@@ -607,8 +613,11 @@ void Writer::createECCodeMap() {
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
for (Chunk *c : chunks) {
SectionChunk *sc = dyn_cast<SectionChunk>(c);
- if (!sc)
+ if (!sc) {
+ if (!cast<NonSectionChunk>(c)->verifyRanges())
+ return false;
continue;
+ }
MachineTypes machine = sc->getMachine();
ArrayRef<coff_relocation> relocs = sc->getRelocs();
diff --git a/lld/test/COFF/arm64ec-import-range-ext.test b/lld/test/COFF/arm64ec-import-range-ext.test
new file mode 100644
index 00000000000000..701d4c11cc5646
--- /dev/null
+++ b/lld/test/COFF/arm64ec-import-range-ext.test
@@ -0,0 +1,39 @@
+REQUIRES: aarch64, x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test.s -o test.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+RUN: llvm-lib -machine:arm64ec -def:test.def -out:test.lib
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out.dll loadconfig-arm64ec.obj test.obj test.lib
+
+RUN: llvm-objdump -d out.dll | FileCheck --check-prefix=DISASM %s
+DISASM: 0000000180001000 <.text>:
+DISASM-NEXT: 180001000: 52800000 mov w0, #0x0 // =0
+DISASM-NEXT: 180001004: d65f03c0 ret
+DISASM-NEXT: ...
+DISASM-NEXT: 188001008: b000000b adrp x11, 0x188002000
+DISASM-NEXT: 18800100c: f940016b ldr x11, [x11]
+DISASM-NEXT: 188001010: f0fbffea adrp x10, 0x180000000
+DISASM-NEXT: 188001014: 9100014a add x10, x10, #0x0
+DISASM-NEXT: 188001018: 90fc0010 adrp x16, 0x180001000 <.text>
+DISASM-NEXT: 18800101c: 91000210 add x16, x16, #0x0
+DISASM-NEXT: 188001020: d61f0200 br x16
+
+#--- test.s
+ .text
+ .globl __icall_helper_arm64ec
+ .p2align 2, 0x0
+__icall_helper_arm64ec:
+ mov w0, #0
+ ret
+
+ .space 0x8000000
+
+ .data
+ .rva __imp_func
+
+#--- test.def
+NAME test.dll
+EXPORTS
+ func
More information about the llvm-commits
mailing list