[lld] [lld-macho] Include branch extension thunks in linker map (PR #120496)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 18 15:54:27 PST 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/120496

>From 96693a15c979078b74d919d1e3e7a8301af348a8 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Wed, 18 Dec 2024 15:41:20 -0800
Subject: [PATCH 1/2] [lld-macho] Include branch extension thunks in linker map

This patch extends the MachO linker's map file generation to include branch extension thunk symbols. Previously, thunks were omitted from the map file, making it difficult to understand the final layout of the binary, especially when debugging issues related to long branch thunks. This change ensures thunks are included and correctly interleaved with other symbols based on their address, providing an accurate representation of the linked output.
---
 lld/MachO/ConcatOutputSection.h | 15 +++++++++++----
 lld/MachO/MapFile.cpp           | 29 ++++++++++++++++++++++++++++-
 lld/MachO/OutputSection.h       |  1 +
 lld/test/MachO/arm64-thunks.s   | 26 +++++++++++++++++++++++++-
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index 9af661d0ab1e0c..c2449e82b73ad1 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -25,8 +25,9 @@ class Defined;
 // in the final binary.
 class ConcatOutputSection : public OutputSection {
 public:
-  explicit ConcatOutputSection(StringRef name)
-      : OutputSection(ConcatKind, name) {}
+  explicit ConcatOutputSection(StringRef name,
+                               OutputSection::Kind kind = ConcatKind)
+      : OutputSection(kind, name) {}
 
   const ConcatInputSection *firstSection() const { return inputs.front(); }
   const ConcatInputSection *lastSection() const { return inputs.back(); }
@@ -46,7 +47,7 @@ class ConcatOutputSection : public OutputSection {
   void writeTo(uint8_t *buf) const override;
 
   static bool classof(const OutputSection *sec) {
-    return sec->kind() == ConcatKind;
+    return sec->kind() == ConcatKind || sec->kind() == TextKind;
   }
 
   static ConcatOutputSection *getOrCreateForInput(const InputSection *);
@@ -66,12 +67,18 @@ class ConcatOutputSection : public OutputSection {
 // support thunk insertion.
 class TextOutputSection : public ConcatOutputSection {
 public:
-  explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
+  explicit TextOutputSection(StringRef name)
+      : ConcatOutputSection(name, TextKind) {}
   void finalizeContents() override {}
   void finalize() override;
   bool needsThunks() const;
+  const std::vector<ConcatInputSection *> &getThunks() const { return thunks; }
   void writeTo(uint8_t *buf) const override;
 
+  static bool classof(const OutputSection *sec) {
+    return sec->kind() == TextKind;
+  }
+
 private:
   uint64_t estimateStubsInRangeVA(size_t callIdx) const;
 
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 9c0621622ae2f0..6fdea64cbefa04 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -161,6 +161,28 @@ static uint64_t getSymSizeForMap(Defined *sym) {
   return sym->size;
 }
 
+// Merges two vectors of input sections in order of their outSecOff values.
+// This approach creates a new (temporary) vector which is not ideal but the
+// ideal approach leads to a lot of code duplication.
+static std::vector<ConcatInputSection *>
+mergeOrderedInputs(const std::vector<ConcatInputSection *> &inputs1,
+                   const std::vector<ConcatInputSection *> &inputs2) {
+  std::vector<ConcatInputSection *> vec;
+  size_t i = 0, ie = inputs1.size();
+  size_t t = 0, te = inputs2.size();
+  while (i < ie || t < te) {
+    while (i < ie &&
+           (t == te || inputs1[i]->outSecOff <= inputs2[t]->outSecOff)) {
+      vec.push_back(inputs1[i++]);
+    }
+    while (t < te &&
+           (i == ie || inputs2[t]->outSecOff < inputs1[i]->outSecOff)) {
+      vec.push_back(inputs2[t++]);
+    }
+  }
+  return vec;
+}
+
 void macho::writeMapFile() {
   if (config->mapFile.empty())
     return;
@@ -220,7 +242,12 @@ void macho::writeMapFile() {
   os << "# Address\tSize    \tFile  Name\n";
   for (const OutputSegment *seg : outputSegments) {
     for (const OutputSection *osec : seg->getSections()) {
-      if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
+      const TextOutputSection *textOsec = dyn_cast<TextOutputSection>(osec);
+      if (textOsec && textOsec->getThunks().size()) {
+        auto inputsAndThunks =
+            mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
+        printIsecArrSyms(inputsAndThunks);
+      } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
       } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h
index 5297a03c2cfa7f..9afd3a9eeb1928 100644
--- a/lld/MachO/OutputSection.h
+++ b/lld/MachO/OutputSection.h
@@ -37,6 +37,7 @@ class OutputSection {
   enum Kind {
     ConcatKind,
     SyntheticKind,
+    TextKind,
   };
 
   OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {}
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index d887359bbc23e1..b3aa8f037ecfe7 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -8,14 +8,38 @@
 ## (4) early calls to a dylib stub use a thunk, and later calls the stub
 ##     directly
 ## (5) Thunks are created for all sections in the text segment with branches.
+## (6) Thunks are in the linker map file.
 ## Notes:
 ## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
 
 # RUN: rm -rf %t; mkdir %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
-# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
+# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
+## Check that the thunks appear in the map file and that everything is sorted by address
+# MAP: [[ADDR2:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b [[#ADDR2 > #ADDR1]]
+# MAP: [[ADDR3:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c [[#ADDR3 > #ADDR2]]
+# MAP: [[ADDR4:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.0 [[#ADDR4 > #ADDR3]]
+# MAP: [[ADDR5:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.0 [[#ADDR5 > #ADDR4]]
+# MAP: [[ADDR6:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.0 [[#ADDR6 > #ADDR5]]
+# MAP: [[ADDR7:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g.thunk.0 [[#ADDR7 > #ADDR6]]
+# MAP: [[ADDR8:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h.thunk.0 [[#ADDR8 > #ADDR7]]
+# MAP: [[ADDR9:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] ___nan.thunk.0 [[#ADDR9 > #ADDR8]]
+# MAP: [[ADDR10:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d [[#ADDR10 > #ADDR9]]
+# MAP: [[ADDR11:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e [[#ADDR11 > #ADDR10]]
+# MAP: [[ADDR12:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f [[#ADDR12 > #ADDR11]]
+# MAP: [[ADDR13:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g [[#ADDR13 > #ADDR12]]
+# MAP: [[ADDR14:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _a.thunk.0 [[#ADDR14 > #ADDR13]]
+# MAP: [[ADDR15:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b.thunk.0 [[#ADDR15 > #ADDR14]]
+# MAP: [[ADDR16:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h [[#ADDR16 > #ADDR15]]
+# MAP: [[ADDR17:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _main [[#ADDR17 > #ADDR16]]
+# MAP: [[ADDR18:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c.thunk.0 [[#ADDR18 > #ADDR17]]
+# MAP: [[ADDR19:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.1 [[#ADDR19 > #ADDR18]]
+# MAP: [[ADDR20:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.1 [[#ADDR20 > #ADDR19]]
+# MAP: [[ADDR21:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.1 [[#ADDR21 > #ADDR20]]
+# MAP: [[ADDR22:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _z [[#ADDR22 > #ADDR21]]
+
 # CHECK: Disassembly of section __TEXT,__text:
 
 # CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>:

>From 9537ec3a38ec743e21c3dbaece87e17891a377c8 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Wed, 18 Dec 2024 15:54:15 -0800
Subject: [PATCH 2/2] Remove unecessary variables from test file

---
 lld/test/MachO/arm64-thunks.s | 42 +++++++++++++++++------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index b3aa8f037ecfe7..8d7bb154379bc4 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -18,27 +18,27 @@
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
 ## Check that the thunks appear in the map file and that everything is sorted by address
-# MAP: [[ADDR2:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b [[#ADDR2 > #ADDR1]]
-# MAP: [[ADDR3:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c [[#ADDR3 > #ADDR2]]
-# MAP: [[ADDR4:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.0 [[#ADDR4 > #ADDR3]]
-# MAP: [[ADDR5:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.0 [[#ADDR5 > #ADDR4]]
-# MAP: [[ADDR6:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.0 [[#ADDR6 > #ADDR5]]
-# MAP: [[ADDR7:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g.thunk.0 [[#ADDR7 > #ADDR6]]
-# MAP: [[ADDR8:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h.thunk.0 [[#ADDR8 > #ADDR7]]
-# MAP: [[ADDR9:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] ___nan.thunk.0 [[#ADDR9 > #ADDR8]]
-# MAP: [[ADDR10:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d [[#ADDR10 > #ADDR9]]
-# MAP: [[ADDR11:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e [[#ADDR11 > #ADDR10]]
-# MAP: [[ADDR12:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f [[#ADDR12 > #ADDR11]]
-# MAP: [[ADDR13:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g [[#ADDR13 > #ADDR12]]
-# MAP: [[ADDR14:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _a.thunk.0 [[#ADDR14 > #ADDR13]]
-# MAP: [[ADDR15:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b.thunk.0 [[#ADDR15 > #ADDR14]]
-# MAP: [[ADDR16:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h [[#ADDR16 > #ADDR15]]
-# MAP: [[ADDR17:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _main [[#ADDR17 > #ADDR16]]
-# MAP: [[ADDR18:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c.thunk.0 [[#ADDR18 > #ADDR17]]
-# MAP: [[ADDR19:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.1 [[#ADDR19 > #ADDR18]]
-# MAP: [[ADDR20:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.1 [[#ADDR20 > #ADDR19]]
-# MAP: [[ADDR21:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.1 [[#ADDR21 > #ADDR20]]
-# MAP: [[ADDR22:0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _z [[#ADDR22 > #ADDR21]]
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] ___nan.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _a.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _main
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _z
 
 # CHECK: Disassembly of section __TEXT,__text:
 



More information about the llvm-commits mailing list