[lld] [llvm] [lld-macho] Fix thunks for non-__text TEXT sections (PR #99052)
Leonard Grey via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 13:06:08 PDT 2024
https://github.com/speednoisemovement updated https://github.com/llvm/llvm-project/pull/99052
>From 21dbb09558a08a482dcd6729f0fc2639f7b5beaf Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Thu, 11 Jul 2024 14:03:16 -0400
Subject: [PATCH 1/3] [lld-macho] Ensure all code sections in __text generate
thunks if necessary
---
lld/MachO/CMakeLists.txt | 1 +
lld/MachO/ConcatOutputSection.cpp | 14 +++++++--
lld/MachO/InputSection.cpp | 17 ++--------
lld/MachO/OutputSegment.cpp | 13 ++++++--
lld/MachO/OutputSegment.h | 1 +
lld/MachO/Sections.cpp | 36 ++++++++++++++++++++++
lld/MachO/Sections.h | 18 +++++++++++
lld/test/MachO/arm64-thunks.s | 21 ++++++++++++-
lld/test/MachO/section-order.s | 9 +++---
llvm/utils/gn/secondary/lld/MachO/BUILD.gn | 1 +
10 files changed, 108 insertions(+), 23 deletions(-)
create mode 100644 lld/MachO/Sections.cpp
create mode 100644 lld/MachO/Sections.h
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 0b92488b00bea..3c8effddbbc9e 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -26,6 +26,7 @@ add_lld_library(lldMachO
OutputSegment.cpp
Relocations.cpp
SectionPriorities.cpp
+ Sections.cpp
SymbolTable.cpp
Symbols.cpp
SyntheticSections.cpp
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 279423720be9d..cd87f9cc4c3ca 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
uint64_t isecAddr = addr;
for (ConcatInputSection *isec : inputs)
isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
- if (isecAddr - addr + in.stubs->getSize() <=
- std::min(target->backwardBranchRange, target->forwardBranchRange))
+ // Other sections besides __text might be small enough to pass this
+ // test but nevertheless need thunks for calling into oher sections.
+ // An imperfect heuristic to use in this case is that if a section
+ // we've already processed in this segment needs thunks, so do the
+ // rest.
+ bool needsThunks = parent && parent->needsThunks;
+ if (!needsThunks &&
+ isecAddr - addr + in.stubs->getSize() <=
+ std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
+ if (parent) {
+ parent->needsThunks = true;
+ }
for (ConcatInputSection *isec : inputs) {
for (Reloc &r : isec->relocs) {
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 904701731684b..a9b93e07a6013 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -11,6 +11,7 @@
#include "Config.h"
#include "InputFiles.h"
#include "OutputSegment.h"
+#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
}
bool macho::isCodeSection(const InputSection *isec) {
- uint32_t type = sectionType(isec->getFlags());
- if (type != S_REGULAR && type != S_COALESCED)
- return false;
-
- uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
- if (attr == S_ATTR_PURE_INSTRUCTIONS)
- return true;
-
- if (isec->getSegName() == segment_names::text)
- return StringSwitch<bool>(isec->getName())
- .Cases(section_names::textCoalNt, section_names::staticInit, true)
- .Default(false);
-
- return false;
+ return sections::isCodeSection(isec->getName(), isec->getSegName(),
+ isec->getFlags());
}
bool macho::isCfStringSection(const InputSection *isec) {
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index a887bc4d515de..c6b24c2270b9a 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -9,6 +9,7 @@
#include "OutputSegment.h"
#include "ConcatOutputSection.h"
#include "InputSection.h"
+#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
@@ -89,9 +90,17 @@ static int sectionOrder(OutputSection *osec) {
StringRef segname = osec->parent->name;
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
+ if (osec->name == section_names::header)
+ return -7;
+ if (osec->name == section_names::text)
+ return -6;
+ // Ensure all code sections are contiguous with `__text` for thunk
+ // calculations.
+ if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
+ osec->name != section_names::stubHelper) {
+ return -5;
+ }
return StringSwitch<int>(osec->name)
- .Case(section_names::header, -6)
- .Case(section_names::text, -5)
.Case(section_names::stubs, -4)
.Case(section_names::stubHelper, -3)
.Case(section_names::objcStubs, -2)
diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h
index 7a0c4a2065a15..5f39734ffdbd7 100644
--- a/lld/MachO/OutputSegment.h
+++ b/lld/MachO/OutputSegment.h
@@ -57,6 +57,7 @@ class OutputSegment {
uint32_t initProt = 0;
uint32_t flags = 0;
uint8_t index;
+ bool needsThunks = false;
llvm::TinyPtrVector<Defined *> segmentStartSymbols;
llvm::TinyPtrVector<Defined *> segmentEndSymbols;
diff --git a/lld/MachO/Sections.cpp b/lld/MachO/Sections.cpp
new file mode 100644
index 0000000000000..919cb452e7054
--- /dev/null
+++ b/lld/MachO/Sections.cpp
@@ -0,0 +1,36 @@
+//===- Sections.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sections.h"
+#include "InputSection.h"
+#include "OutputSegment.h"
+
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
+ uint32_t type = sectionType(flags);
+ if (type != S_REGULAR && type != S_COALESCED)
+ return false;
+
+ uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
+ if (attr == S_ATTR_PURE_INSTRUCTIONS)
+ return true;
+
+ if (segName == segment_names::text)
+ return StringSwitch<bool>(name)
+ .Cases(section_names::textCoalNt, section_names::staticInit, true)
+ .Default(false);
+
+ return false;
+}
+
+} // namespace lld::macho::sections
\ No newline at end of file
diff --git a/lld/MachO/Sections.h b/lld/MachO/Sections.h
new file mode 100644
index 0000000000000..e9131cfef0bda
--- /dev/null
+++ b/lld/MachO/Sections.h
@@ -0,0 +1,18 @@
+//===- Sections.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_SECTIONS_H
+#define LLD_MACHO_SECTIONS_H
+
+#include "lld/Common/LLVM.h"
+
+namespace lld::macho::sections {
+bool isCodeSection(StringRef name, StringRef segName, uint32_t flags);
+} // namespace lld::macho::sections
+
+#endif // #ifndef LLD_MACHO_SECTIONS_H
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index 0e4157ece72de..d887359bbc23e 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -7,12 +7,13 @@
## (3) a second thunk is created when the first one goes out of range
## (4) early calls to a dylib stub use a thunk, and later calls the stub
## directly
+## (5) Thunks are created for all sections in the text segment with branches.
## Notes:
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
-# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
+# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
# CHECK: Disassembly of section __TEXT,__text:
@@ -164,6 +165,10 @@
# CHECK: adrp x16, 0x[[#%x, F_PAGE]]
# CHECK: add x16, x16, #[[#F_OFFSET]]
+# CHECK: Disassembly of section __TEXT,__lcxx_override:
+# CHECK: <_z>:
+# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
+
# CHECK: Disassembly of section __TEXT,__stubs:
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
@@ -300,3 +305,17 @@ _main:
bl _h
bl ___nan
ret
+
+.section __TEXT,__cstring
+ .space 0x4000000
+
+.section __TEXT,__lcxx_override,regular,pure_instructions
+
+.globl _z
+.no_dead_strip _z
+.p2align 2
+_z:
+ bl _a
+ ## Ensure calling into stubs works
+ bl _extern_sym
+ ret
diff --git a/lld/test/MachO/section-order.s b/lld/test/MachO/section-order.s
index f575eebf96dba..19d241dc8c158 100644
--- a/lld/test/MachO/section-order.s
+++ b/lld/test/MachO/section-order.s
@@ -1,4 +1,3 @@
-# REQUIRES: x86
## Check that section ordering follows from input file ordering.
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o
@@ -23,18 +22,20 @@
# CHECK-12-NEXT: __cstring
# CHECK-21: __text
+## `foo` always sorts next to `__text` since it's a code section
+## and needs to be adjacent for arm64 thunk calculations
+# CHECK-21-NEXT: foo
# CHECK-21-NEXT: __cstring
# CHECK-21-NEXT: bar
-# CHECK-21-NEXT: foo
# CHECK-SYNTHETIC-ORDER: __text
+# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
# CHECK-SYNTHETIC-ORDER-NEXT: __cstring
# CHECK-SYNTHETIC-ORDER-NEXT: bar
-# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
# CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
@@ -52,5 +53,5 @@
.asciz ""
.section __TEXT,bar
.space 1
-.section __TEXT,foo
+.section __TEXT,foo,regular,pure_instructions
.space 1
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 66f762e0a7c26..2579d9ba8e43e 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -44,6 +44,7 @@ static_library("MachO") {
"OutputSegment.cpp",
"Relocations.cpp",
"SectionPriorities.cpp",
+ "Sections.cpp",
"SymbolTable.cpp",
"Symbols.cpp",
"SyntheticSections.cpp",
>From eb3418808c7073c5d49798b89931dc51662d9413 Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Mon, 22 Jul 2024 11:28:19 -0400
Subject: [PATCH 2/3] Comments
---
lld/MachO/Sections.cpp | 2 +-
lld/MachO/Sections.h | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/lld/MachO/Sections.cpp b/lld/MachO/Sections.cpp
index 919cb452e7054..a27d902c0a227 100644
--- a/lld/MachO/Sections.cpp
+++ b/lld/MachO/Sections.cpp
@@ -33,4 +33,4 @@ bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
return false;
}
-} // namespace lld::macho::sections
\ No newline at end of file
+} // namespace lld::macho::sections
diff --git a/lld/MachO/Sections.h b/lld/MachO/Sections.h
index e9131cfef0bda..2246333d85729 100644
--- a/lld/MachO/Sections.h
+++ b/lld/MachO/Sections.h
@@ -9,10 +9,11 @@
#ifndef LLD_MACHO_SECTIONS_H
#define LLD_MACHO_SECTIONS_H
-#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
namespace lld::macho::sections {
-bool isCodeSection(StringRef name, StringRef segName, uint32_t flags);
+bool isCodeSection(llvm::StringRef name, llvm::StringRef segName,
+ uint32_t flags);
} // namespace lld::macho::sections
#endif // #ifndef LLD_MACHO_SECTIONS_H
>From 824e432b3cd02a9868e9ba36101e655a8be4e75e Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Mon, 22 Jul 2024 16:05:45 -0400
Subject: [PATCH 3/3] Add comment, revert test req
---
lld/MachO/ConcatOutputSection.cpp | 2 +-
lld/MachO/OutputSegment.cpp | 3 +++
lld/test/MachO/section-order.s | 1 +
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index cd87f9cc4c3ca..832e0f3f0caf1 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -128,7 +128,7 @@ bool TextOutputSection::needsThunks() const {
for (ConcatInputSection *isec : inputs)
isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
// Other sections besides __text might be small enough to pass this
- // test but nevertheless need thunks for calling into oher sections.
+ // test but nevertheless need thunks for calling into ohter sections.
// An imperfect heuristic to use in this case is that if a section
// we've already processed in this segment needs thunks, so do the
// rest.
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index c6b24c2270b9a..3d8a8eb61a9bb 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -92,6 +92,9 @@ static int sectionOrder(OutputSection *osec) {
if (segname == segment_names::text) {
if (osec->name == section_names::header)
return -7;
+ // `__text` needs to precede the other code sections since its
+ // expected to be the largest. This means in effect that it will
+ // be the section that determines whether we need thunks or not.
if (osec->name == section_names::text)
return -6;
// Ensure all code sections are contiguous with `__text` for thunk
diff --git a/lld/test/MachO/section-order.s b/lld/test/MachO/section-order.s
index 19d241dc8c158..7a0b6f799630e 100644
--- a/lld/test/MachO/section-order.s
+++ b/lld/test/MachO/section-order.s
@@ -1,3 +1,4 @@
+# REQUIRES: x86
## Check that section ordering follows from input file ordering.
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o
More information about the llvm-commits
mailing list