[lld] ce2ae38 - [lld-macho] Deduplicate the `__objc_classrefs` section contents

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 8 05:34:19 PST 2022


Author: Jez Ng
Date: 2022-03-08T08:34:04-05:00
New Revision: ce2ae381246df89e560c0dfd0a7fdf275f266d9e

URL: https://github.com/llvm/llvm-project/commit/ce2ae381246df89e560c0dfd0a7fdf275f266d9e
DIFF: https://github.com/llvm/llvm-project/commit/ce2ae381246df89e560c0dfd0a7fdf275f266d9e.diff

LOG: [lld-macho] Deduplicate the `__objc_classrefs` section contents

ld64 breaks down `__objc_classrefs` on a per-word level and deduplicates
them. This greatly reduces the number of bind entries emitted (and
therefore the amount of work `dyld` has to do at runtime). For
chromium_framework, this change to LLD cuts the number of (non-lazy)
binds from 912 to 190, getting us to parity with ld64 in this aspect.

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D121053

Added: 
    lld/test/MachO/objc-classrefs-dedup.s

Modified: 
    lld/MachO/ICF.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 7e23d067b8130..16ce44f21eaa2 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -374,7 +374,8 @@ void macho::foldIdenticalSections() {
   uint64_t icfUniqueID = inputSections.size();
   for (ConcatInputSection *isec : inputSections) {
     // FIXME: consider non-code __text sections as hashable?
-    bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
+    bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) ||
+                       isClassRefsSection(isec)) &&
                       !isec->shouldOmitFromOutput() &&
                       sectionType(isec->getFlags()) == MachO::S_REGULAR;
     if (isHashable) {
@@ -392,7 +393,7 @@ void macho::foldIdenticalSections() {
     // information gets recorded in our Reloc structs.) We therefore create a
     // mutable copy of the CFString and zero out the embedded addends before
     // performing any hashing / equality checks.
-    if (isCfStringSection(isec)) {
+    if (isCfStringSection(isec) || isClassRefsSection(isec)) {
       MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
       for (const Reloc &r : isec->relocs)
         target->relocateOne(copy.data() + r.offset, r, /*va=*/0, /*relocVA=*/0);

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index ac329536dc426..e4508b22a6eba 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -252,13 +252,17 @@ InputFile::InputFile(Kind kind, const InterfaceFile &interface)
 // Note that "record" is a term I came up with. In contrast, "literal" is a term
 // used by the Mach-O format.
 static Optional<size_t> getRecordSize(StringRef segname, StringRef name) {
-  if (name == section_names::cfString) {
-    if (config->icfLevel != ICFLevel::none && segname == segment_names::data)
-      return target->wordSize == 8 ? 32 : 16;
-  } else if (name == section_names::compactUnwind) {
-    if (segname == segment_names::ld)
-      return target->wordSize == 8 ? 32 : 20;
+  if (name == section_names::compactUnwind) {
+      if (segname == segment_names::ld)
+        return target->wordSize == 8 ? 32 : 20;
   }
+  if (config->icfLevel == ICFLevel::none)
+    return {};
+
+  if (name == section_names::cfString && segname == segment_names::data)
+    return target->wordSize == 8 ? 32 : 16;
+  if (name == section_names::objcClassRefs && segname == segment_names::data)
+    return target->wordSize;
   return {};
 }
 

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 9c523f5afc8ad..3685ec69820c4 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -261,6 +261,11 @@ bool macho::isCfStringSection(const InputSection *isec) {
          isec->getSegName() == segment_names::data;
 }
 
+bool macho::isClassRefsSection(const InputSection *isec) {
+  return isec->getName() == section_names::objcClassRefs &&
+         isec->getSegName() == segment_names::data;
+}
+
 std::string lld::toString(const InputSection *isec) {
   return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();
 }

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index bd9db5f432694..68d8ed97b11fe 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -268,8 +268,8 @@ inline bool isWordLiteralSection(uint32_t flags) {
 }
 
 bool isCodeSection(const InputSection *);
-
 bool isCfStringSection(const InputSection *);
+bool isClassRefsSection(const InputSection *);
 
 extern std::vector<ConcatInputSection *> inputSections;
 
@@ -306,6 +306,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func";
 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
 constexpr const char objcCatList[] = "__objc_catlist";
 constexpr const char objcClassList[] = "__objc_classlist";
+constexpr const char objcClassRefs[] = "__objc_classrefs";
 constexpr const char objcConst[] = "__objc_const";
 constexpr const char objcImageInfo[] = "__objc_imageinfo";
 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";

diff  --git a/lld/test/MachO/objc-classrefs-dedup.s b/lld/test/MachO/objc-classrefs-dedup.s
new file mode 100644
index 0000000000000..dcb6bd6b75a55
--- /dev/null
+++ b/lld/test/MachO/objc-classrefs-dedup.s
@@ -0,0 +1,53 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/defs.s -o %t/defs.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/refs1.s -o %t/refs1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/refs2.s -o %t/refs2.o
+# RUN: %lld -lSystem -dylib %t/defs.o -o %t/libdefs.dylib
+# RUN: %lld -lSystem -dylib --icf=all %t/refs1.o %t/refs2.o %t/libdefs.dylib -o %t/out
+# RUN: llvm-objdump --macho --section-headers --bind %t/out | FileCheck %s \
+# RUN:   --implicit-check-not __objc_classrefs
+
+## Check that we only have 3 (unique) entries
+# CHECK:      Sections:
+# CHECK-NEXT: Idx Name             Size
+# CHECK:          __objc_classrefs 00000018
+
+## And only two binds
+# CHECK:       Bind table:
+# CHECK-NEXT:  segment  section           address  type     addend dylib    symbol
+# CHECK-DAG:   __DATA   __objc_classrefs  {{.*}}   pointer       0 libdefs  _OBJC_CLASS_$_Bar
+# CHECK-DAG:   __DATA   __objc_classrefs  {{.*}}   pointer       0 libdefs  _OBJC_CLASS_$_Foo
+
+#--- defs.s
+.globl _OBJC_CLASS_$_Foo, _OBJC_CLASS_$_Bar
+.section __DATA,__objc_data
+_OBJC_CLASS_$_Foo:
+ .quad 123
+
+_OBJC_CLASS_$_Bar:
+ .quad 456
+
+.subsections_via_symbols
+
+#--- refs1.s
+.globl _OBJC_CLASS_$_Baz
+
+.section __DATA,__objc_data
+_OBJC_CLASS_$_Baz:
+ .quad 789
+
+.section __DATA,__objc_classrefs
+.quad _OBJC_CLASS_$_Foo
+.quad _OBJC_CLASS_$_Bar
+.quad _OBJC_CLASS_$_Baz
+.quad _OBJC_CLASS_$_Baz
+
+.subsections_via_symbols
+
+#--- refs2.s
+.section __DATA,__objc_classrefs
+.quad _OBJC_CLASS_$_Foo
+.quad _OBJC_CLASS_$_Bar
+
+.subsections_via_symbols


        


More information about the llvm-commits mailing list