[lld] [lld-macho] Include branch extension thunks in linker map (PR #120496)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 18 15:55:18 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: None (alx32)

<details>
<summary>Changes</summary>

This patch extends the MachO linker's map file generation to include branch extension thunk symbols. Previously, thunks were omitted from the map file, making it difficult to understand the final layout of the binary, especially when debugging issues related to long branch thunks. This change ensures thunks are included and correctly interleaved with other symbols based on their address, providing an accurate representation of the linked output.

---
Full diff: https://github.com/llvm/llvm-project/pull/120496.diff


4 Files Affected:

- (modified) lld/MachO/ConcatOutputSection.h (+11-4) 
- (modified) lld/MachO/MapFile.cpp (+28-1) 
- (modified) lld/MachO/OutputSection.h (+1) 
- (modified) lld/test/MachO/arm64-thunks.s (+25-1) 


``````````diff
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index 9af661d0ab1e0c..c2449e82b73ad1 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -25,8 +25,9 @@ class Defined;
 // in the final binary.
 class ConcatOutputSection : public OutputSection {
 public:
-  explicit ConcatOutputSection(StringRef name)
-      : OutputSection(ConcatKind, name) {}
+  explicit ConcatOutputSection(StringRef name,
+                               OutputSection::Kind kind = ConcatKind)
+      : OutputSection(kind, name) {}
 
   const ConcatInputSection *firstSection() const { return inputs.front(); }
   const ConcatInputSection *lastSection() const { return inputs.back(); }
@@ -46,7 +47,7 @@ class ConcatOutputSection : public OutputSection {
   void writeTo(uint8_t *buf) const override;
 
   static bool classof(const OutputSection *sec) {
-    return sec->kind() == ConcatKind;
+    return sec->kind() == ConcatKind || sec->kind() == TextKind;
   }
 
   static ConcatOutputSection *getOrCreateForInput(const InputSection *);
@@ -66,12 +67,18 @@ class ConcatOutputSection : public OutputSection {
 // support thunk insertion.
 class TextOutputSection : public ConcatOutputSection {
 public:
-  explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
+  explicit TextOutputSection(StringRef name)
+      : ConcatOutputSection(name, TextKind) {}
   void finalizeContents() override {}
   void finalize() override;
   bool needsThunks() const;
+  const std::vector<ConcatInputSection *> &getThunks() const { return thunks; }
   void writeTo(uint8_t *buf) const override;
 
+  static bool classof(const OutputSection *sec) {
+    return sec->kind() == TextKind;
+  }
+
 private:
   uint64_t estimateStubsInRangeVA(size_t callIdx) const;
 
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 9c0621622ae2f0..6fdea64cbefa04 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -161,6 +161,28 @@ static uint64_t getSymSizeForMap(Defined *sym) {
   return sym->size;
 }
 
+// Merges two vectors of input sections in order of their outSecOff values.
+// This approach creates a new (temporary) vector which is not ideal but the
+// ideal approach leads to a lot of code duplication.
+static std::vector<ConcatInputSection *>
+mergeOrderedInputs(const std::vector<ConcatInputSection *> &inputs1,
+                   const std::vector<ConcatInputSection *> &inputs2) {
+  std::vector<ConcatInputSection *> vec;
+  size_t i = 0, ie = inputs1.size();
+  size_t t = 0, te = inputs2.size();
+  while (i < ie || t < te) {
+    while (i < ie &&
+           (t == te || inputs1[i]->outSecOff <= inputs2[t]->outSecOff)) {
+      vec.push_back(inputs1[i++]);
+    }
+    while (t < te &&
+           (i == ie || inputs2[t]->outSecOff < inputs1[i]->outSecOff)) {
+      vec.push_back(inputs2[t++]);
+    }
+  }
+  return vec;
+}
+
 void macho::writeMapFile() {
   if (config->mapFile.empty())
     return;
@@ -220,7 +242,12 @@ void macho::writeMapFile() {
   os << "# Address\tSize    \tFile  Name\n";
   for (const OutputSegment *seg : outputSegments) {
     for (const OutputSection *osec : seg->getSections()) {
-      if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
+      const TextOutputSection *textOsec = dyn_cast<TextOutputSection>(osec);
+      if (textOsec && textOsec->getThunks().size()) {
+        auto inputsAndThunks =
+            mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
+        printIsecArrSyms(inputsAndThunks);
+      } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
       } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h
index 5297a03c2cfa7f..9afd3a9eeb1928 100644
--- a/lld/MachO/OutputSection.h
+++ b/lld/MachO/OutputSection.h
@@ -37,6 +37,7 @@ class OutputSection {
   enum Kind {
     ConcatKind,
     SyntheticKind,
+    TextKind,
   };
 
   OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {}
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index d887359bbc23e1..8d7bb154379bc4 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -8,14 +8,38 @@
 ## (4) early calls to a dylib stub use a thunk, and later calls the stub
 ##     directly
 ## (5) Thunks are created for all sections in the text segment with branches.
+## (6) Thunks are in the linker map file.
 ## Notes:
 ## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
 
 # RUN: rm -rf %t; mkdir %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
-# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
+# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
+## Check that the thunks appear in the map file and that everything is sorted by address
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] ___nan.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _g
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _a.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _b.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _h
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _main
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _c.thunk.0
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _d.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _e.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _f.thunk.1
+# MAP: [[0x[0-9A-Fa-f]+]] 0x[0-9A-Fa-f]+ \[[0-9]+\] _z
+
 # CHECK: Disassembly of section __TEXT,__text:
 
 # CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>:

``````````

</details>


https://github.com/llvm/llvm-project/pull/120496


More information about the llvm-commits mailing list