[lld] [llvm] [lld-macho] Fix thunks for non-__text TEXT sections (PR #99052)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 07:55:23 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: Leonard Grey (speednoisemovement)

<details>
<summary>Changes</summary>

This supersedes https://github.com/llvm/llvm-project/pull/87818 and fixes https://github.com/llvm/llvm-project/issues/52767

When calculating arm64 thunks, we make a few assumptions that may not hold when considering code sections outside of `__text`:

1. That a section needs thunks only if its size is larger than the branch range.
2. That any calls into `__stubs` are necessarily forward jumps (that is, the section with the jump is ordered before `__stubs`)

Sections like this exist in the wild, most prominently the `__lcxx_overrides` section introduced in https://github.com/llvm/llvm-project/pull/69498

This change:
- Ensures that if one section in `__TEXT` gets thunks, all of them do.
- Makes all code sections in `__TEXT` contiguous (and guaranteed to be placed before `__stubs`)

---
Full diff: https://github.com/llvm/llvm-project/pull/99052.diff


10 Files Affected:

- (modified) lld/MachO/CMakeLists.txt (+1) 
- (modified) lld/MachO/ConcatOutputSection.cpp (+12-2) 
- (modified) lld/MachO/InputSection.cpp (+3-14) 
- (modified) lld/MachO/OutputSegment.cpp (+11-2) 
- (modified) lld/MachO/OutputSegment.h (+1) 
- (added) lld/MachO/Sections.cpp (+36) 
- (added) lld/MachO/Sections.h (+18) 
- (modified) lld/test/MachO/arm64-thunks.s (+20-1) 
- (modified) lld/test/MachO/section-order.s (+5-4) 
- (modified) llvm/utils/gn/secondary/lld/MachO/BUILD.gn (+1) 


``````````diff
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 0b92488b00bea..3c8effddbbc9e 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -26,6 +26,7 @@ add_lld_library(lldMachO
   OutputSegment.cpp
   Relocations.cpp
   SectionPriorities.cpp
+  Sections.cpp
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 279423720be9d..cd87f9cc4c3ca 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
   uint64_t isecAddr = addr;
   for (ConcatInputSection *isec : inputs)
     isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
-  if (isecAddr - addr + in.stubs->getSize() <=
-      std::min(target->backwardBranchRange, target->forwardBranchRange))
+  // Other sections besides __text might be small enough to pass this
+  // test but nevertheless need thunks for calling into oher sections.
+  // An imperfect heuristic to use in this case is that if a section
+  // we've already processed in this segment needs thunks, so do the
+  // rest.
+  bool needsThunks = parent && parent->needsThunks;
+  if (!needsThunks &&
+      isecAddr - addr + in.stubs->getSize() <=
+          std::min(target->backwardBranchRange, target->forwardBranchRange))
     return false;
   // Yes, this program is large enough to need thunks.
+  if (parent) {
+    parent->needsThunks = true;
+  }
   for (ConcatInputSection *isec : inputs) {
     for (Reloc &r : isec->relocs) {
       if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 904701731684b..a9b93e07a6013 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -11,6 +11,7 @@
 #include "Config.h"
 #include "InputFiles.h"
 #include "OutputSegment.h"
+#include "Sections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
 }
 
 bool macho::isCodeSection(const InputSection *isec) {
-  uint32_t type = sectionType(isec->getFlags());
-  if (type != S_REGULAR && type != S_COALESCED)
-    return false;
-
-  uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
-  if (attr == S_ATTR_PURE_INSTRUCTIONS)
-    return true;
-
-  if (isec->getSegName() == segment_names::text)
-    return StringSwitch<bool>(isec->getName())
-        .Cases(section_names::textCoalNt, section_names::staticInit, true)
-        .Default(false);
-
-  return false;
+  return sections::isCodeSection(isec->getName(), isec->getSegName(),
+                                 isec->getFlags());
 }
 
 bool macho::isCfStringSection(const InputSection *isec) {
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index a887bc4d515de..c6b24c2270b9a 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -9,6 +9,7 @@
 #include "OutputSegment.h"
 #include "ConcatOutputSection.h"
 #include "InputSection.h"
+#include "Sections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 
@@ -89,9 +90,17 @@ static int sectionOrder(OutputSection *osec) {
   StringRef segname = osec->parent->name;
   // Sections are uniquely identified by their segment + section name.
   if (segname == segment_names::text) {
+    if (osec->name == section_names::header)
+      return -7;
+    if (osec->name == section_names::text)
+      return -6;
+    // Ensure all code sections are contiguous with `__text` for thunk
+    // calculations.
+    if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
+        osec->name != section_names::stubHelper) {
+      return -5;
+    }
     return StringSwitch<int>(osec->name)
-        .Case(section_names::header, -6)
-        .Case(section_names::text, -5)
         .Case(section_names::stubs, -4)
         .Case(section_names::stubHelper, -3)
         .Case(section_names::objcStubs, -2)
diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h
index 7a0c4a2065a15..5f39734ffdbd7 100644
--- a/lld/MachO/OutputSegment.h
+++ b/lld/MachO/OutputSegment.h
@@ -57,6 +57,7 @@ class OutputSegment {
   uint32_t initProt = 0;
   uint32_t flags = 0;
   uint8_t index;
+  bool needsThunks = false;
 
   llvm::TinyPtrVector<Defined *> segmentStartSymbols;
   llvm::TinyPtrVector<Defined *> segmentEndSymbols;
diff --git a/lld/MachO/Sections.cpp b/lld/MachO/Sections.cpp
new file mode 100644
index 0000000000000..919cb452e7054
--- /dev/null
+++ b/lld/MachO/Sections.cpp
@@ -0,0 +1,36 @@
+//===- Sections.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sections.h"
+#include "InputSection.h"
+#include "OutputSegment.h"
+
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
+  uint32_t type = sectionType(flags);
+  if (type != S_REGULAR && type != S_COALESCED)
+    return false;
+
+  uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
+  if (attr == S_ATTR_PURE_INSTRUCTIONS)
+    return true;
+
+  if (segName == segment_names::text)
+    return StringSwitch<bool>(name)
+        .Cases(section_names::textCoalNt, section_names::staticInit, true)
+        .Default(false);
+
+  return false;
+}
+
+} // namespace lld::macho::sections
\ No newline at end of file
diff --git a/lld/MachO/Sections.h b/lld/MachO/Sections.h
new file mode 100644
index 0000000000000..e9131cfef0bda
--- /dev/null
+++ b/lld/MachO/Sections.h
@@ -0,0 +1,18 @@
+//===- Sections.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_SECTIONS_H
+#define LLD_MACHO_SECTIONS_H
+
+#include "lld/Common/LLVM.h"
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags);
+} // namespace lld::macho::sections
+
+#endif // #ifndef LLD_MACHO_SECTIONS_H
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index 0e4157ece72de..d887359bbc23e 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -7,12 +7,13 @@
 ## (3) a second thunk is created when the first one goes out of range
 ## (4) early calls to a dylib stub use a thunk, and later calls the stub
 ##     directly
+## (5) Thunks are created for all sections in the text segment with branches.
 ## Notes:
 ## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
 
 # RUN: rm -rf %t; mkdir %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
-# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
+# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
 # CHECK: Disassembly of section __TEXT,__text:
@@ -164,6 +165,10 @@
 # CHECK:  adrp x16, 0x[[#%x, F_PAGE]]
 # CHECK:  add  x16, x16, #[[#F_OFFSET]]
 
+# CHECK: Disassembly of section __TEXT,__lcxx_override:
+# CHECK: <_z>:
+# CHECK:  bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
+
 # CHECK: Disassembly of section __TEXT,__stubs:
 
 # CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
@@ -300,3 +305,17 @@ _main:
   bl _h
   bl ___nan
   ret
+
+.section __TEXT,__cstring
+  .space 0x4000000
+
+.section __TEXT,__lcxx_override,regular,pure_instructions
+
+.globl _z
+.no_dead_strip _z
+.p2align 2
+_z:
+  bl _a
+  ## Ensure calling into stubs works
+  bl _extern_sym
+  ret
diff --git a/lld/test/MachO/section-order.s b/lld/test/MachO/section-order.s
index f575eebf96dba..19d241dc8c158 100644
--- a/lld/test/MachO/section-order.s
+++ b/lld/test/MachO/section-order.s
@@ -1,4 +1,3 @@
-# REQUIRES: x86
 ## Check that section ordering follows from input file ordering.
 # RUN: rm -rf %t; split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o
@@ -23,18 +22,20 @@
 # CHECK-12-NEXT: __cstring
 
 # CHECK-21:      __text
+## `foo` always sorts next to `__text` since it's a code section
+## and needs to be adjacent for arm64 thunk calculations
+# CHECK-21-NEXT: foo
 # CHECK-21-NEXT: __cstring
 # CHECK-21-NEXT: bar
-# CHECK-21-NEXT: foo
 
 # CHECK-SYNTHETIC-ORDER:      __text
+# CHECK-SYNTHETIC-ORDER-NEXT: foo
 # CHECK-SYNTHETIC-ORDER-NEXT: __stubs
 # CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
 # CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
 # CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
 # CHECK-SYNTHETIC-ORDER-NEXT: __cstring
 # CHECK-SYNTHETIC-ORDER-NEXT: bar
-# CHECK-SYNTHETIC-ORDER-NEXT: foo
 # CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
 # CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
 # CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
@@ -52,5 +53,5 @@
   .asciz ""
 .section __TEXT,bar
   .space 1
-.section __TEXT,foo
+.section __TEXT,foo,regular,pure_instructions
   .space 1
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 66f762e0a7c26..2579d9ba8e43e 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -44,6 +44,7 @@ static_library("MachO") {
     "OutputSegment.cpp",
     "Relocations.cpp",
     "SectionPriorities.cpp",
+    "Sections.cpp",
     "SymbolTable.cpp",
     "Symbols.cpp",
     "SyntheticSections.cpp",

``````````

</details>


https://github.com/llvm/llvm-project/pull/99052


More information about the llvm-commits mailing list