[lld] [llvm] [lld-macho] Fix thunks for non-__text TEXT sections (PR #99052)

Leonard Grey via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 22 13:06:08 PDT 2024


https://github.com/speednoisemovement updated https://github.com/llvm/llvm-project/pull/99052

>From 21dbb09558a08a482dcd6729f0fc2639f7b5beaf Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Thu, 11 Jul 2024 14:03:16 -0400
Subject: [PATCH 1/3] [lld-macho] Ensure all code sections in __text generate
 thunks if necessary

---
 lld/MachO/CMakeLists.txt                   |  1 +
 lld/MachO/ConcatOutputSection.cpp          | 14 +++++++--
 lld/MachO/InputSection.cpp                 | 17 ++--------
 lld/MachO/OutputSegment.cpp                | 13 ++++++--
 lld/MachO/OutputSegment.h                  |  1 +
 lld/MachO/Sections.cpp                     | 36 ++++++++++++++++++++++
 lld/MachO/Sections.h                       | 18 +++++++++++
 lld/test/MachO/arm64-thunks.s              | 21 ++++++++++++-
 lld/test/MachO/section-order.s             |  9 +++---
 llvm/utils/gn/secondary/lld/MachO/BUILD.gn |  1 +
 10 files changed, 108 insertions(+), 23 deletions(-)
 create mode 100644 lld/MachO/Sections.cpp
 create mode 100644 lld/MachO/Sections.h

diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 0b92488b00bea..3c8effddbbc9e 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -26,6 +26,7 @@ add_lld_library(lldMachO
   OutputSegment.cpp
   Relocations.cpp
   SectionPriorities.cpp
+  Sections.cpp
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 279423720be9d..cd87f9cc4c3ca 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
   uint64_t isecAddr = addr;
   for (ConcatInputSection *isec : inputs)
     isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
-  if (isecAddr - addr + in.stubs->getSize() <=
-      std::min(target->backwardBranchRange, target->forwardBranchRange))
+  // Other sections besides __text might be small enough to pass this
+  // test but nevertheless need thunks for calling into oher sections.
+  // An imperfect heuristic to use in this case is that if a section
+  // we've already processed in this segment needs thunks, so do the
+  // rest.
+  bool needsThunks = parent && parent->needsThunks;
+  if (!needsThunks &&
+      isecAddr - addr + in.stubs->getSize() <=
+          std::min(target->backwardBranchRange, target->forwardBranchRange))
     return false;
   // Yes, this program is large enough to need thunks.
+  if (parent) {
+    parent->needsThunks = true;
+  }
   for (ConcatInputSection *isec : inputs) {
     for (Reloc &r : isec->relocs) {
       if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 904701731684b..a9b93e07a6013 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -11,6 +11,7 @@
 #include "Config.h"
 #include "InputFiles.h"
 #include "OutputSegment.h"
+#include "Sections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
 }
 
 bool macho::isCodeSection(const InputSection *isec) {
-  uint32_t type = sectionType(isec->getFlags());
-  if (type != S_REGULAR && type != S_COALESCED)
-    return false;
-
-  uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
-  if (attr == S_ATTR_PURE_INSTRUCTIONS)
-    return true;
-
-  if (isec->getSegName() == segment_names::text)
-    return StringSwitch<bool>(isec->getName())
-        .Cases(section_names::textCoalNt, section_names::staticInit, true)
-        .Default(false);
-
-  return false;
+  return sections::isCodeSection(isec->getName(), isec->getSegName(),
+                                 isec->getFlags());
 }
 
 bool macho::isCfStringSection(const InputSection *isec) {
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index a887bc4d515de..c6b24c2270b9a 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -9,6 +9,7 @@
 #include "OutputSegment.h"
 #include "ConcatOutputSection.h"
 #include "InputSection.h"
+#include "Sections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 
@@ -89,9 +90,17 @@ static int sectionOrder(OutputSection *osec) {
   StringRef segname = osec->parent->name;
   // Sections are uniquely identified by their segment + section name.
   if (segname == segment_names::text) {
+    if (osec->name == section_names::header)
+      return -7;
+    if (osec->name == section_names::text)
+      return -6;
+    // Ensure all code sections are contiguous with `__text` for thunk
+    // calculations.
+    if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
+        osec->name != section_names::stubHelper) {
+      return -5;
+    }
     return StringSwitch<int>(osec->name)
-        .Case(section_names::header, -6)
-        .Case(section_names::text, -5)
         .Case(section_names::stubs, -4)
         .Case(section_names::stubHelper, -3)
         .Case(section_names::objcStubs, -2)
diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h
index 7a0c4a2065a15..5f39734ffdbd7 100644
--- a/lld/MachO/OutputSegment.h
+++ b/lld/MachO/OutputSegment.h
@@ -57,6 +57,7 @@ class OutputSegment {
   uint32_t initProt = 0;
   uint32_t flags = 0;
   uint8_t index;
+  bool needsThunks = false;
 
   llvm::TinyPtrVector<Defined *> segmentStartSymbols;
   llvm::TinyPtrVector<Defined *> segmentEndSymbols;
diff --git a/lld/MachO/Sections.cpp b/lld/MachO/Sections.cpp
new file mode 100644
index 0000000000000..919cb452e7054
--- /dev/null
+++ b/lld/MachO/Sections.cpp
@@ -0,0 +1,36 @@
+//===- Sections.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sections.h"
+#include "InputSection.h"
+#include "OutputSegment.h"
+
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
+  uint32_t type = sectionType(flags);
+  if (type != S_REGULAR && type != S_COALESCED)
+    return false;
+
+  uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
+  if (attr == S_ATTR_PURE_INSTRUCTIONS)
+    return true;
+
+  if (segName == segment_names::text)
+    return StringSwitch<bool>(name)
+        .Cases(section_names::textCoalNt, section_names::staticInit, true)
+        .Default(false);
+
+  return false;
+}
+
+} // namespace lld::macho::sections
\ No newline at end of file
diff --git a/lld/MachO/Sections.h b/lld/MachO/Sections.h
new file mode 100644
index 0000000000000..e9131cfef0bda
--- /dev/null
+++ b/lld/MachO/Sections.h
@@ -0,0 +1,18 @@
+//===- Sections.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_SECTIONS_H
+#define LLD_MACHO_SECTIONS_H
+
+#include "lld/Common/LLVM.h"
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags);
+} // namespace lld::macho::sections
+
+#endif // #ifndef LLD_MACHO_SECTIONS_H
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index 0e4157ece72de..d887359bbc23e 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -7,12 +7,13 @@
 ## (3) a second thunk is created when the first one goes out of range
 ## (4) early calls to a dylib stub use a thunk, and later calls the stub
 ##     directly
+## (5) Thunks are created for all sections in the text segment with branches.
 ## Notes:
 ## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
 
 # RUN: rm -rf %t; mkdir %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
-# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
+# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
 # CHECK: Disassembly of section __TEXT,__text:
@@ -164,6 +165,10 @@
 # CHECK:  adrp x16, 0x[[#%x, F_PAGE]]
 # CHECK:  add  x16, x16, #[[#F_OFFSET]]
 
+# CHECK: Disassembly of section __TEXT,__lcxx_override:
+# CHECK: <_z>:
+# CHECK:  bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
+
 # CHECK: Disassembly of section __TEXT,__stubs:
 
 # CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
@@ -300,3 +305,17 @@ _main:
   bl _h
   bl ___nan
   ret
+
+.section __TEXT,__cstring
+  .space 0x4000000
+
+.section __TEXT,__lcxx_override,regular,pure_instructions
+
+.globl _z
+.no_dead_strip _z
+.p2align 2
+_z:
+  bl _a
+  ## Ensure calling into stubs works
+  bl _extern_sym
+  ret
diff --git a/lld/test/MachO/section-order.s b/lld/test/MachO/section-order.s
index f575eebf96dba..19d241dc8c158 100644
--- a/lld/test/MachO/section-order.s
+++ b/lld/test/MachO/section-order.s
@@ -1,4 +1,3 @@
-# REQUIRES: x86
 ## Check that section ordering follows from input file ordering.
 # RUN: rm -rf %t; split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o
@@ -23,18 +22,20 @@
 # CHECK-12-NEXT: __cstring
 
 # CHECK-21:      __text
+## `foo` always sorts next to `__text` since it's a code section
+## and needs to be adjacent for arm64 thunk calculations
+# CHECK-21-NEXT: foo
 # CHECK-21-NEXT: __cstring
 # CHECK-21-NEXT: bar
-# CHECK-21-NEXT: foo
 
 # CHECK-SYNTHETIC-ORDER:      __text
+# CHECK-SYNTHETIC-ORDER-NEXT: foo
 # CHECK-SYNTHETIC-ORDER-NEXT: __stubs
 # CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
 # CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
 # CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
 # CHECK-SYNTHETIC-ORDER-NEXT: __cstring
 # CHECK-SYNTHETIC-ORDER-NEXT: bar
-# CHECK-SYNTHETIC-ORDER-NEXT: foo
 # CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
 # CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
 # CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
@@ -52,5 +53,5 @@
   .asciz ""
 .section __TEXT,bar
   .space 1
-.section __TEXT,foo
+.section __TEXT,foo,regular,pure_instructions
   .space 1
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 66f762e0a7c26..2579d9ba8e43e 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -44,6 +44,7 @@ static_library("MachO") {
     "OutputSegment.cpp",
     "Relocations.cpp",
     "SectionPriorities.cpp",
+    "Sections.cpp",
     "SymbolTable.cpp",
     "Symbols.cpp",
     "SyntheticSections.cpp",

>From eb3418808c7073c5d49798b89931dc51662d9413 Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Mon, 22 Jul 2024 11:28:19 -0400
Subject: [PATCH 2/3] Comments

---
 lld/MachO/Sections.cpp | 2 +-
 lld/MachO/Sections.h   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lld/MachO/Sections.cpp b/lld/MachO/Sections.cpp
index 919cb452e7054..a27d902c0a227 100644
--- a/lld/MachO/Sections.cpp
+++ b/lld/MachO/Sections.cpp
@@ -33,4 +33,4 @@ bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
   return false;
 }
 
-} // namespace lld::macho::sections
\ No newline at end of file
+} // namespace lld::macho::sections
diff --git a/lld/MachO/Sections.h b/lld/MachO/Sections.h
index e9131cfef0bda..2246333d85729 100644
--- a/lld/MachO/Sections.h
+++ b/lld/MachO/Sections.h
@@ -9,10 +9,11 @@
 #ifndef LLD_MACHO_SECTIONS_H
 #define LLD_MACHO_SECTIONS_H
 
-#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace lld::macho::sections {
-bool isCodeSection(StringRef name, StringRef segName, uint32_t flags);
+bool isCodeSection(llvm::StringRef name, llvm::StringRef segName,
+                   uint32_t flags);
 } // namespace lld::macho::sections
 
 #endif // #ifndef LLD_MACHO_SECTIONS_H

>From 824e432b3cd02a9868e9ba36101e655a8be4e75e Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Mon, 22 Jul 2024 16:05:45 -0400
Subject: [PATCH 3/3] Add comment, revert test req

---
 lld/MachO/ConcatOutputSection.cpp | 2 +-
 lld/MachO/OutputSegment.cpp       | 3 +++
 lld/test/MachO/section-order.s    | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index cd87f9cc4c3ca..832e0f3f0caf1 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -128,7 +128,7 @@ bool TextOutputSection::needsThunks() const {
   for (ConcatInputSection *isec : inputs)
     isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
   // Other sections besides __text might be small enough to pass this
-  // test but nevertheless need thunks for calling into oher sections.
+  // test but nevertheless need thunks for calling into ohter sections.
   // An imperfect heuristic to use in this case is that if a section
   // we've already processed in this segment needs thunks, so do the
   // rest.
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index c6b24c2270b9a..3d8a8eb61a9bb 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -92,6 +92,9 @@ static int sectionOrder(OutputSection *osec) {
   if (segname == segment_names::text) {
     if (osec->name == section_names::header)
       return -7;
+    // `__text` needs to precede the other code sections since its
+    // expected to be the largest. This means in effect that it will
+    // be the section that determines whether we need thunks or not.
     if (osec->name == section_names::text)
       return -6;
     // Ensure all code sections are contiguous with `__text` for thunk
diff --git a/lld/test/MachO/section-order.s b/lld/test/MachO/section-order.s
index 19d241dc8c158..7a0b6f799630e 100644
--- a/lld/test/MachO/section-order.s
+++ b/lld/test/MachO/section-order.s
@@ -1,3 +1,4 @@
+# REQUIRES: x86
 ## Check that section ordering follows from input file ordering.
 # RUN: rm -rf %t; split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o



More information about the llvm-commits mailing list