[llvm] 389e0a8 - [lld-macho] Support synthesizing __TEXT, __init_offsets

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 31 01:22:07 PDT 2022


Author: Daniel Bertalan
Date: 2022-08-31T10:13:45+02:00
New Revision: 389e0a81a15ca688cf85a82d04aeaa68d18da161

URL: https://github.com/llvm/llvm-project/commit/389e0a81a15ca688cf85a82d04aeaa68d18da161
DIFF: https://github.com/llvm/llvm-project/commit/389e0a81a15ca688cf85a82d04aeaa68d18da161.diff

LOG: [lld-macho] Support synthesizing __TEXT,__init_offsets

This section stores 32-bit `__TEXT` segment offsets of initializer
functions, and is used instead of `__mod_init_func` when chained fixups
are enabled.

Storing the offsets lets us avoid emitting fixups for the initializers.

Differential Revision: https://reviews.llvm.org/D132947

Added: 
    lld/test/MachO/init-offsets.s

Modified: 
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputSection.h
    lld/MachO/MarkLive.cpp
    lld/MachO/Options.td
    lld/MachO/OutputSegment.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    llvm/include/llvm/BinaryFormat/MachO.h
    llvm/lib/MC/MCSectionMachO.cpp
    llvm/tools/llvm-objdump/MachODump.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 8f2d790603c63..c6e8b2582bd7d 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -131,6 +131,7 @@ struct Configuration {
   bool emitBitcodeBundle = false;
   bool emitDataInCodeInfo = false;
   bool emitEncryptionInfo = false;
+  bool emitInitOffsets = false;
   bool timeTraceEnabled = false;
   bool dataConst = false;
   bool dedupLiterals = true;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7d9b06411818b..d22c3ea718c48 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1103,6 +1103,11 @@ static void gatherInputSections() {
         if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
           if (isec->isCoalescedWeak())
             continue;
+          if (config->emitInitOffsets &&
+              sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
+            in.initOffsets->addInput(isec);
+            continue;
+          }
           isec->outSecOff = inputOrder++;
           if (!osec)
             osec = ConcatOutputSection::getOrCreateForInput(isec);
@@ -1432,6 +1437,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle);
   config->emitDataInCodeInfo =
       args.hasFlag(OPT_data_in_code_info, OPT_no_data_in_code_info, true);
+  config->emitInitOffsets = args.hasArg(OPT_init_offsets);
   config->icfLevel = getICFLevel(args);
   config->dedupLiterals =
       args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) ||

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0f79bdfd22648..8946724e2d984 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -314,6 +314,7 @@ constexpr const char functionStarts[] = "__func_starts";
 constexpr const char got[] = "__got";
 constexpr const char header[] = "__mach_header";
 constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
+constexpr const char initOffsets[] = "__init_offsets";
 constexpr const char const_[] = "__const";
 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
 constexpr const char lazyBinding[] = "__lazy_binding";

diff  --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index cc9afe181d0f3..ba7d215d9f0a4 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -279,11 +279,16 @@ void markLive() {
     // mod_init_funcs, mod_term_funcs sections
     if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
         sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
+      assert(!config->emitInitOffsets ||
+             sectionType(isec->getFlags()) != S_MOD_INIT_FUNC_POINTERS);
       marker->enqueue(isec, 0);
       continue;
     }
   }
 
+  for (ConcatInputSection *isec : in.initOffsets->inputs())
+    marker->enqueue(isec, 0);
+
   marker->markTransitively();
 }
 

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 2d2c58e3ada01..6af993a430cb6 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1273,8 +1273,7 @@ def ignore_optimization_hints : Flag<["-"], "ignore_optimization_hints">,
     HelpText<"Ignore Linker Optimization Hints">,
     Group<grp_undocumented>;
 def init_offsets : Flag<["-"], "init_offsets">,
-    HelpText<"This option is undocumented in ld64">,
-    Flags<[HelpHidden]>,
+    HelpText<"Store __TEXT segment offsets of static initializers">,
     Group<grp_undocumented>;
 def keep_dwarf_unwind : Flag<["-"], "keep_dwarf_unwind">,
     HelpText<"This option is undocumented in ld64">,

diff  --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index da1394c088314..91770f58b805a 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -84,10 +84,11 @@ static int sectionOrder(OutputSection *osec) {
   // Sections are uniquely identified by their segment + section name.
   if (segname == segment_names::text) {
     return StringSwitch<int>(osec->name)
-        .Case(section_names::header, -4)
-        .Case(section_names::text, -3)
-        .Case(section_names::stubs, -2)
-        .Case(section_names::stubHelper, -1)
+        .Case(section_names::header, -5)
+        .Case(section_names::text, -4)
+        .Case(section_names::stubs, -3)
+        .Case(section_names::stubHelper, -2)
+        .Case(section_names::initOffsets, -1)
         .Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
         .Case(section_names::ehFrame, std::numeric_limits<int>::max())
         .Default(osec->inputOrder);

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index c661913becf48..9d3b56a7ae269 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -346,6 +346,14 @@ T *replaceSymbol(Symbol *s, ArgT &&...arg) {
   return sym;
 }
 
+// Can a symbol's address only be resolved at runtime?
+inline bool needsBinding(const Symbol *sym) {
+  if (isa<DylibSymbol>(sym))
+    return true;
+  if (const auto *defined = dyn_cast<Defined>(sym))
+    return defined->isExternalWeakDef() || defined->interposable;
+  return false;
+}
 } // namespace macho
 
 std::string toString(const macho::Symbol &);

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 1737484c83a93..9373e0b24fe0a 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1816,6 +1816,74 @@ void ObjCImageInfoSection::writeTo(uint8_t *buf) const {
   write32le(buf + 4, flags);
 }
 
+InitOffsetsSection::InitOffsetsSection()
+    : SyntheticSection(segment_names::text, section_names::initOffsets) {
+  flags = S_INIT_FUNC_OFFSETS;
+}
+
+uint64_t InitOffsetsSection::getSize() const {
+  size_t count = 0;
+  for (const ConcatInputSection *isec : sections)
+    count += isec->relocs.size();
+  return count * sizeof(uint32_t);
+}
+
+void InitOffsetsSection::writeTo(uint8_t *buf) const {
+  uint64_t textVA = 0;
+  for (const OutputSegment *oseg : outputSegments)
+    if (oseg->name == segment_names::text) {
+      textVA = oseg->addr;
+      break;
+    }
+
+  // FIXME: Add function specified by -init when that argument is implemented.
+  for (ConcatInputSection *isec : sections) {
+    for (const Reloc &rel : isec->relocs) {
+      const Symbol *referent = rel.referent.dyn_cast<Symbol *>();
+      assert(referent && "section relocation should have been rejected");
+      uint64_t offset = referent->getVA() - textVA;
+      // FIXME: Can we handle this gracefully?
+      if (offset > UINT32_MAX)
+        fatal(isec->getLocation(rel.offset) + ": offset to initializer " +
+              referent->getName() + " (" + utohexstr(offset) +
+              ") does not fit in 32 bits");
+
+      // Entries need to be added in the order they appear in the section, but
+      // relocations aren't guaranteed to be sorted.
+      size_t index = rel.offset >> target->p2WordSize;
+      write32le(&buf[index * sizeof(uint32_t)], offset);
+    }
+    buf += isec->relocs.size() * sizeof(uint32_t);
+  }
+}
+
+// The inputs are __mod_init_func sections, which contain pointers to
+// initializer functions, therefore all relocations should be of the UNSIGNED
+// type. InitOffsetsSection stores offsets, so if the initializer's address is
+// not known at link time, stub-indirection has to be used.
+void InitOffsetsSection::setUp() {
+  for (const ConcatInputSection *isec : sections) {
+    for (const Reloc &rel : isec->relocs) {
+      RelocAttrs attrs = target->getRelocAttrs(rel.type);
+      if (!attrs.hasAttr(RelocAttrBits::UNSIGNED))
+        error(isec->getLocation(rel.offset) +
+              ": unsupported relocation type: " + attrs.name);
+      if (rel.addend != 0)
+        error(isec->getLocation(rel.offset) +
+              ": relocation addend is not representable in __init_offsets");
+      if (rel.referent.is<InputSection *>())
+        error(isec->getLocation(rel.offset) +
+              ": unexpected section relocation");
+
+      Symbol *sym = rel.referent.dyn_cast<Symbol *>();
+      if (auto *undefined = dyn_cast<Undefined>(sym))
+        treatUndefinedSymbol(*undefined, isec, rel.offset);
+      if (needsBinding(sym))
+        in.stubs->addEntry(sym);
+    }
+  }
+}
+
 void macho::createSyntheticSymbols() {
   auto addHeaderSymbol = [](const char *name) {
     symtab->addSynthetic(name, in.header->isec, /*value=*/0,

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 9b158dbd515d1..ac9bde62e6696 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -647,6 +647,32 @@ class ObjCImageInfoSection final : public SyntheticSection {
   std::vector<const InputFile *> files; // files with image info
 };
 
+// This section stores 32-bit __TEXT segment offsets of initializer functions.
+//
+// The compiler stores pointers to initializers in __mod_init_func. These need
+// to be fixed up at load time, which takes time and dirties memory. By
+// synthesizing InitOffsetsSection from them, this data can live in the
+// read-only __TEXT segment instead. This section is used by default when
+// chained fixups are enabled.
+//
+// There is no similar counterpart to __mod_term_func, as that section is
+// deprecated, and static destructors are instead handled by registering them
+// via __cxa_atexit from an autogenerated initializer function (see D121736).
+class InitOffsetsSection final : public SyntheticSection {
+public:
+  InitOffsetsSection();
+  bool isNeeded() const override { return !sections.empty(); }
+  uint64_t getSize() const override;
+  void writeTo(uint8_t *buf) const override;
+  void setUp();
+
+  void addInput(ConcatInputSection *isec) { sections.push_back(isec); }
+  const std::vector<ConcatInputSection *> &inputs() const { return sections; }
+
+private:
+  std::vector<ConcatInputSection *> sections;
+};
+
 struct InStruct {
   const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
@@ -668,6 +694,7 @@ struct InStruct {
   UnwindInfoSection *unwindInfo = nullptr;
   ObjCImageInfoSection *objCImageInfo = nullptr;
   ConcatInputSection *imageLoaderCache = nullptr;
+  InitOffsetsSection *initOffsets = nullptr;
 };
 
 extern InStruct in;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index e8bcf8cd96445..01054fe773b1f 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -575,15 +575,6 @@ void Writer::treatSpecialUndefineds() {
   }
 }
 
-// Can a symbol's address can only be resolved at runtime?
-static bool needsBinding(const Symbol *sym) {
-  if (isa<DylibSymbol>(sym))
-    return true;
-  if (const auto *defined = dyn_cast<Defined>(sym))
-    return defined->isExternalWeakDef() || defined->interposable;
-  return false;
-}
-
 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
                                     const lld::macho::Reloc &r) {
   assert(sym->isLive());
@@ -1141,6 +1132,8 @@ template <class LP> void Writer::run() {
   if (in.objcStubs->isNeeded())
     in.objcStubs->setUp();
   scanRelocations();
+  if (in.initOffsets->isNeeded())
+    in.initOffsets->setUp();
 
   // Do not proceed if there was an undefined symbol.
   reportPendingUndefinedSymbols();
@@ -1204,6 +1197,7 @@ void macho::createSyntheticSections() {
   in.objcStubs = make<ObjCStubsSection>();
   in.unwindInfo = makeUnwindInfoSection();
   in.objCImageInfo = make<ObjCImageInfoSection>();
+  in.initOffsets = make<InitOffsetsSection>();
 
   // This section contains space for just a single word, and will be used by
   // dyld to cache an address to the image loader it uses.

diff  --git a/lld/test/MachO/init-offsets.s b/lld/test/MachO/init-offsets.s
new file mode 100644
index 0000000000000..9a27033adfdfe
--- /dev/null
+++ b/lld/test/MachO/init-offsets.s
@@ -0,0 +1,73 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/second.s -o %t/second.o
+
+# RUN: %lld -lSystem -init_offsets -undefined dynamic_lookup  %t/first.o %t/second.o -o %t/out
+# RUN: llvm-otool -lv %t/out | FileCheck --check-prefix=FLAGS --implicit-check-not=__mod_init_func %s
+# RUN: llvm-otool -l %t/out > %t/dump.txt
+# RUN: llvm-objdump --macho --print-imm-hex --section=__TEXT,__stubs %t/out >> %t/dump.txt
+# RUN: llvm-objdump --macho --syms %t/out >> %t/dump.txt
+# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
+# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
+# RUN: od -An -txI %t/section.bin >> %t/dump.txt
+# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
+
+## This test checks that:
+## - __mod_init_func is replaced by __init_offsets.
+## - __mod_init_func has type S_INIT_FUNC_OFFSETS.
+## - initializers show up in the order their parent objects are specified on the
+##   command line, and in the order they show up within __mod_init_func.
+## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
+## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
+
+# FLAGS:      sectname __init_offsets
+# FLAGS-NEXT:  segname __TEXT
+# FLAGS-NEXT:     addr
+# FLAGS-NEXT:     size 0x0000000000000010
+# FLAGS-NEXT:   offset
+# FLAGS-NEXT:    align
+# FLAGS-NEXT:   reloff 0
+# FLAGS-NEXT:   nreloc 0
+# FLAGS-NEXT:     type S_INIT_FUNC_OFFSETS
+
+# CONTENT:      segname __TEXT
+# CONTENT-NEXT: 0x[[#%x, TEXT:]]
+
+# CONTENT:      Contents of (__TEXT,__stubs) section
+# CONTENT-NEXT: [[#%x, ISNAN:]]: {{.*}} ## literal pool symbol address: ___isnan
+# CONTENT-NEXT: [[#%x, UNDEF:]]: {{.*}} ## literal pool symbol address: _undefined
+
+# CONTENT: SYMBOL TABLE:
+# CONTENT: [[#%x, FIRST:]]  g F __TEXT,__text _first_init
+# CONTENT: [[#%x, SECOND:]] g F __TEXT,__text _second_init
+
+# CONTENT: __TEXT,__init_offsets contents:
+# CONTENT: [[#%.8x, FIRST - TEXT]] [[#%.8x, ISNAN - TEXT]] [[#%.8x, UNDEF - TEXT]] [[#%.8x, SECOND - TEXT]]
+
+#--- first.s
+.globl _first_init, ___isnan, _main
+.text
+_first_init:
+  ret
+_main:
+  ret
+
+.section __DATA,__mod_init_func,mod_init_funcs
+.quad _first_init
+.quad ___isnan
+
+.subsections_via_symbols
+
+#--- second.s
+.globl _second_init, _undefined
+.text
+_second_init:
+  ret
+
+.section __DATA,__mod_init_func,mod_init_funcs
+.quad _undefined
+.quad _second_init
+
+.subsections_via_symbols

diff  --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index 81b315a59ea9e..8627ed68bf09c 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -175,8 +175,11 @@ enum SectionType : uint32_t {
   /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
   /// variable initialization pointers to functions.
   S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u,
+  /// S_INIT_FUNC_OFFSETS - Section with 32-bit offsets to initializer
+  /// functions.
+  S_INIT_FUNC_OFFSETS = 0x16u,
 
-  LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS
+  LAST_KNOWN_SECTION_TYPE = S_INIT_FUNC_OFFSETS
 };
 
 enum : uint32_t {

diff  --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp
index 1c210fb0f4c87..f7eedac3f2d1a 100644
--- a/llvm/lib/MC/MCSectionMachO.cpp
+++ b/llvm/lib/MC/MCSectionMachO.cpp
@@ -62,6 +62,8 @@ static constexpr struct {
      StringLiteral("S_THREAD_LOCAL_VARIABLE_POINTERS")}, // 0x14
     {StringLiteral("thread_local_init_function_pointers"),
      StringLiteral("S_THREAD_LOCAL_INIT_FUNCTION_POINTERS")}, // 0x15
+    {StringLiteral("") /* linker-synthesized */,
+     StringLiteral("S_INIT_FUNC_OFFSETS")}, // 0x16
 };
 
 /// SectionAttrDescriptors - This is an array of descriptors for section

diff  --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 1146d36f5da29..f615fbf4d81c5 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -8955,6 +8955,8 @@ static void PrintSection(const char *sectname, const char *segname,
       outs() << " S_THREAD_LOCAL_VARIABLE_POINTERS\n";
     else if (section_type == MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS)
       outs() << " S_THREAD_LOCAL_INIT_FUNCTION_POINTERS\n";
+    else if (section_type == MachO::S_INIT_FUNC_OFFSETS)
+      outs() << " S_INIT_FUNC_OFFSETS\n";
     else
       outs() << format("0x%08" PRIx32, section_type) << "\n";
     outs() << "attributes";


        


More information about the llvm-commits mailing list