[lld] [LLD][COFF] Add support for range extention thunks for ARM64EC targets. (PR #106289)

Jacek Caban via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 14:04:28 PDT 2024


https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/106289

Thunks themselves are the same as regular ARM64 thunks; they just need to report the correct machine type. When processing the code, we also need to use the current chunk's machine type instead of the global one: we don't want to treat x86_64 thunks as ARM64EC, and we need to report the correct machine type in hybrid binaries.

>From bf1945ed0c5e75a2aaa13c0c38e23dfd9971c685 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Sat, 24 Jun 2023 00:14:18 +0200
Subject: [PATCH] [LLD][COFF] Add support for range extention thunks for
 ARM64EC targets.

Thunks themselves are the same as regular ARM64 thunks; they just need to report
the correct machine type. When processing the code, we also need to use the current
chunk's machine type instead of the global one: we don't want to treat x86_64 thunks
as ARM64EC, and we need to report the correct machine type in hybrid binaries.
---
 lld/COFF/Chunks.cpp                  |   7 +-
 lld/COFF/Chunks.h                    |   9 +-
 lld/COFF/Writer.cpp                  |  38 +++---
 lld/test/COFF/arm64ec-range-thunks.s | 179 +++++++++++++++++++++++++++
 4 files changed, 207 insertions(+), 26 deletions(-)
 create mode 100644 lld/test/COFF/arm64ec-range-thunks.s

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 72a9ad05ca11c1..386012e3ce8237 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -842,14 +842,9 @@ const uint8_t arm64Thunk[] = {
     0x00, 0x02, 0x1f, 0xd6, // br   x16
 };
 
-size_t RangeExtensionThunkARM64::getSize() const {
-  assert(ctx.config.machine == ARM64);
-  (void)&ctx;
-  return sizeof(arm64Thunk);
-}
+size_t RangeExtensionThunkARM64::getSize() const { return sizeof(arm64Thunk); }
 
 void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
-  assert(ctx.config.machine == ARM64);
   memcpy(buf, arm64Thunk, sizeof(arm64Thunk));
   applyArm64Addr(buf + 0, target->getRVA(), rva, 12);
   applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index fe202008971a54..e8e647b01c74b5 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -617,18 +617,19 @@ class RangeExtensionThunkARM : public NonSectionCodeChunk {
 
 class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
 public:
-  explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t)
-      : target(t), ctx(ctx) {
+  explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
+      : target(t), machine(machine) {
     setAlignment(4);
+    assert(llvm::COFF::isAnyArm64(machine));
   }
   size_t getSize() const override;
   void writeTo(uint8_t *buf) const override;
-  MachineTypes getMachine() const override { return ARM64; }
+  MachineTypes getMachine() const override { return machine; }
 
   Defined *target;
 
 private:
-  COFFLinkerContext &ctx;
+  MachineTypes machine;
 };
 
 // Windows-specific.
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 35e0f98926ee8a..4a0eed4d00997e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -219,10 +219,12 @@ class Writer {
   void sortECChunks();
   void removeUnusedSections();
   void assignAddresses();
-  bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
+  bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+                 MachineTypes machine);
   std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
                                       Defined *target, uint64_t p,
-                                      uint16_t type, int margin);
+                                      uint16_t type, int margin,
+                                      MachineTypes machine);
   bool createThunks(OutputSection *os, int margin);
   bool verifyRanges(const std::vector<Chunk *> chunks);
   void createECCodeMap();
@@ -396,8 +398,9 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {
 
 // Check whether the target address S is in range from a relocation
 // of type relType at address P.
-bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
-  if (ctx.config.machine == ARMNT) {
+bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+                       MachineTypes machine) {
+  if (machine == ARMNT) {
     int64_t diff = AbsoluteDifference(s, p + 4) + margin;
     switch (relType) {
     case IMAGE_REL_ARM_BRANCH20T:
@@ -408,7 +411,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
     default:
       return true;
     }
-  } else if (ctx.config.machine == ARM64) {
+  } else if (isAnyArm64(machine)) {
     int64_t diff = AbsoluteDifference(s, p) + margin;
     switch (relType) {
     case IMAGE_REL_ARM64_BRANCH26:
@@ -421,7 +424,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
       return true;
     }
   } else {
-    llvm_unreachable("Unexpected architecture");
+    return true;
   }
 }
 
@@ -429,17 +432,17 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
 // or create a new one.
 std::pair<Defined *, bool>
 Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
-                 uint64_t p, uint16_t type, int margin) {
+                 uint64_t p, uint16_t type, int margin, MachineTypes machine) {
   Defined *&lastThunk = lastThunks[target->getRVA()];
-  if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin))
+  if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
     return {lastThunk, false};
   Chunk *c;
-  switch (ctx.config.machine) {
-  case ARMNT:
+  switch (getMachineArchType(machine)) {
+  case Triple::thumb:
     c = make<RangeExtensionThunkARM>(ctx, target);
     break;
-  case ARM64:
-    c = make<RangeExtensionThunkARM64>(ctx, target);
+  case Triple::aarch64:
+    c = make<RangeExtensionThunkARM64>(machine, target);
     break;
   default:
     llvm_unreachable("Unexpected architecture");
@@ -471,6 +474,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
     if (!sc)
       continue;
+    MachineTypes machine = sc->getMachine();
     size_t thunkInsertionSpot = i + 1;
 
     // Try to get a good enough estimate of where new thunks will be placed.
@@ -497,11 +501,12 @@ bool Writer::createThunks(OutputSection *os, int margin) {
 
       uint64_t s = sym->getRVA();
 
-      if (isInRange(rel.Type, s, p, margin))
+      if (isInRange(rel.Type, s, p, margin, machine))
         continue;
 
       // If the target isn't in range, hook it up to an existing or new thunk.
-      auto [thunk, wasNew] = getThunk(lastThunks, sym, p, rel.Type, margin);
+      auto [thunk, wasNew] =
+          getThunk(lastThunks, sym, p, rel.Type, margin, machine);
       if (wasNew) {
         Chunk *thunkChunk = thunk->getChunk();
         thunkChunk->setRVA(
@@ -603,6 +608,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
     if (!sc)
       continue;
+    MachineTypes machine = sc->getMachine();
 
     ArrayRef<coff_relocation> relocs = sc->getRelocs();
     for (const coff_relocation &rel : relocs) {
@@ -615,7 +621,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
       uint64_t p = sc->getRVA() + rel.VirtualAddress;
       uint64_t s = sym->getRVA();
 
-      if (!isInRange(rel.Type, s, p, 0))
+      if (!isInRange(rel.Type, s, p, 0, machine))
         return false;
     }
   }
@@ -625,7 +631,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
 // Assign addresses and add thunks if necessary.
 void Writer::finalizeAddresses() {
   assignAddresses();
-  if (ctx.config.machine != ARMNT && ctx.config.machine != ARM64)
+  if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
     return;
 
   size_t origNumChunks = 0;
diff --git a/lld/test/COFF/arm64ec-range-thunks.s b/lld/test/COFF/arm64ec-range-thunks.s
new file mode 100644
index 00000000000000..a4705a5208bd01
--- /dev/null
+++ b/lld/test/COFF/arm64ec-range-thunks.s
@@ -0,0 +1,179 @@
+# REQUIRES: aarch64, x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows funcs.s -o funcs-arm64ec.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+
+
+# Test generating range extension thunks for ARM64EC code. Place some x86_64 chunks in a middle
+# and make sure that thunks stay in ARM64EC code range.
+
+# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj space-x86_64.obj loadconfig-arm64ec.obj -out:test.dll \
+# RUN:          -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d test.dll | FileCheck --check-prefix=DISASM %s
+
+# DISASM:      Disassembly of section .code1:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180003000 <.code1>:
+# DISASM-NEXT: 180003000: 36000040     tbz     w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASM-NEXT: 180003004: d65f03c0     ret
+# DISASM-NEXT: 180003008: b0000050     adrp    x16, 0x18000c000
+# DISASM-NEXT: 18000300c: 91000210     add     x16, x16, #0x0
+# DISASM-NEXT: 180003010: d61f0200     br      x16
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code2:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180004000 <.code2>:
+# DISASM-NEXT:                 ...
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code3:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180005000 <.code3>:
+# DISASM-NEXT:                 ...
+# DISASM-NEXT: 18000c000: 36000060     tbz     w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASM-NEXT: 18000c004: d65f03c0     ret
+# DISASM-NEXT: 18000c008: 00000000     udf     #0x0
+# DISASM-NEXT: 18000c00c: 90000050     adrp    x16, 0x180014000 <.code3+0xf000>
+# DISASM-NEXT: 18000c010: 91006210     add     x16, x16, #0x18
+# DISASM-NEXT: 18000c014: d61f0200     br      x16
+# DISASM-NEXT:                 ...
+# DISASM-NEXT: 180014018: 36000040     tbz     w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASM-NEXT: 18001401c: d65f03c0     ret
+# DISASM-NEXT: 180014020: f0ffff70     adrp    x16, 0x180003000 <.code1>
+# DISASM-NEXT: 180014024: 91000210     add     x16, x16, #0x0
+# DISASM-NEXT: 180014028: d61f0200     br      x16
+
+# RUN: llvm-readobj --coff-load-config test.dll | FileCheck --check-prefix=LOADCFG %s
+
+# LOADCFG:       CodeMap [
+# LOADCFG-NEXT:    0x3000 - 0x3014  ARM64EC
+# LOADCFG-NEXT:    0x4000 - 0x4300  X64
+# LOADCFG-NEXT:    0x5000 - 0x1402C  ARM64EC
+# LOADCFG-NEXT:  ]
+
+
+# A similar test using a hybrid binary and native placeholder chunks.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
+# RUN:          -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s
+
+# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s
+
+# LOADCFGX:       CodeMap [
+# LOADCFGX-NEXT:    0x3000 - 0x3014  ARM64EC
+# LOADCFGX-NEXT:    0x4000 - 0x4300  ARM64
+# LOADCFGX-NEXT:    0x5000 - 0x1402C  ARM64EC
+# LOADCFGX-NEXT:  ]
+
+
+# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
+# RUN:          -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
+# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s
+
+# DISASMX:      Disassembly of section .code1:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180003000 <.code1>:
+# DISASMX-NEXT: 180003000: 36000040     tbz     w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASMX-NEXT: 180003004: d65f03c0     ret
+# DISASMX-NEXT: 180003008: b0000050     adrp    x16, 0x18000c000
+# DISASMX-NEXT: 18000300c: 91000210     add     x16, x16, #0x0
+# DISASMX-NEXT: 180003010: d61f0200     br      x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code2:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180004000 <.code2>:
+# DISASMX-NEXT: 180004000: 36000040     tbz     w0, #0x0, 0x180004008 <.code2+0x8>
+# DISASMX-NEXT: 180004004: d65f03c0     ret
+# DISASMX-NEXT: 180004008: b0000090     adrp    x16, 0x180015000
+# DISASMX-NEXT: 18000400c: 91000210     add     x16, x16, #0x0
+# DISASMX-NEXT: 180004010: d61f0200     br      x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code3:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180005000 <.code3>:
+# DISASMX-NEXT:                 ...
+# DISASMX-NEXT: 18000c000: 36000060     tbz     w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASMX-NEXT: 18000c004: d65f03c0     ret
+# DISASMX-NEXT: 18000c008: 00000000     udf     #0x0
+# DISASMX-NEXT: 18000c00c: 90000050     adrp    x16, 0x180014000 <.code3+0xf000>
+# DISASMX-NEXT: 18000c010: 91006210     add     x16, x16, #0x18
+# DISASMX-NEXT: 18000c014: d61f0200     br      x16
+# DISASMX-NEXT:                 ...
+# DISASMX-NEXT: 180014018: 36000040     tbz     w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASMX-NEXT: 18001401c: d65f03c0     ret
+# DISASMX-NEXT: 180014020: f0ffff70     adrp    x16, 0x180003000 <.code1>
+# DISASMX-NEXT: 180014024: 91000210     add     x16, x16, #0x0
+# DISASMX-NEXT: 180014028: d61f0200     br      x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code4:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180015000 <.code4>:
+# DISASMX-NEXT: 180015000: 36000040     tbz     w0, #0x0, 0x180015008 <.code4+0x8>
+# DISASMX-NEXT: 180015004: d65f03c0     ret
+# DISASMX-NEXT: 180015008: f0ffff70     adrp    x16, 0x180004000 <.code2>
+# DISASMX-NEXT: 18001500c: 91000210     add     x16, x16, #0x0
+# DISASMX-NEXT: 180015010: d61f0200     br      x16
+
+# RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s
+
+# LOADCFGX2:       CodeMap [
+# LOADCFGX2-NEXT:    0x3000 - 0x3014  ARM64EC
+# LOADCFGX2-NEXT:    0x4000 - 0x4014  ARM64
+# LOADCFGX2-NEXT:    0x5000 - 0x1402C  ARM64EC
+# LOADCFGX2-NEXT:    0x15000 - 0x15014  ARM64
+# LOADCFGX2-NEXT:  ]
+
+
+#--- funcs.s
+        .globl main
+        .globl func1
+        .globl func2
+        .section .code1, "xr"
+main:
+        tbz w0, #0, func1
+        ret
+        .section .code3$a, "xr"
+        .space 0x7000
+        .section .code3$b, "xr"
+func1:
+        tbz w0, #0, func2
+        ret
+        .space 1
+        .section .code3$c, "xr"
+        .space 0x8000
+        .section .code3$d, "xr"
+        .align 2
+func2:
+        tbz w0, #0, main
+        ret
+
+#--- space.s
+        .section .code2$a, "xr"
+        .space 0x100
+        .section .code2$b, "xr"
+        .space 0x100
+        .section .code2$c, "xr"
+        .space 0x100
+
+#--- native-funcs.s
+        .globl nmain
+        .globl nfunc
+        .section .code2, "xr"
+nmain:
+        tbz w0, #0, nfunc
+        ret
+        .section .code4, "xr"
+        .align 2
+nfunc:
+        tbz w0, #0, nmain
+        ret



More information about the llvm-commits mailing list